{smcl}
{com}{sf}{ul off}{txt}{.-}
      name:  {res}<unnamed>
       {txt}log:  {res}H:\Dropbox\work\replicationpack\logs\1_CleanData.smcl
  {txt}log type:  {res}smcl
 {txt}opened on:  {res}29 Nov 2024, 17:22:22
{txt}
{com}. 
. ************************************************************************************************************
. ********************************* Parse and link addresses ************************************/
. ************************************************************************************************************
. 
. **************************** Clean addresses in criminal courts data *********************************
. 
. *-------------------------------
. ** Load data
. *-------------------------------
. 
. * Load data
. cd "$workpath"
{res}H:\Dropbox\work\replicationpack
{txt}
{com}. use data\raw\mdcourts\crimcases_raw.dta, replace
{txt}
{com}. 
. * Keep relevant variables
. keep casenumber Address1_ City1_ State1_ ZipCode1_ DateINIT1 DefendantName1_
{txt}
{com}. 
. * Drop duplicates and compress
. duplicates drop

{p 0 4}{txt}Duplicates in terms of {txt} all variables{p_end}

(1 observation deleted)

{com}. compress
{txt}  (0 bytes saved)

{com}. 
. *-------------------------------
. ** Parse defendant name
. *-------------------------------
. 
. * Parse name
. replace DefendantName1_ = upper(trim(itrim(DefendantName1_)))
{txt}(4,322 real changes made)

{com}. gen lastname_def = regexs(1) if regexm(DefendantName1_,"(.*)(,)(.*)")
{txt}
{com}. gen firstname_def = regexs(3) if regexm(DefendantName1_,"(.*)(,)(.*)")
{txt}(1 missing value generated)

{com}. replace firstname_def = word(firstname_def,1) // Only take first part of first name
{txt}(1,204,628 real changes made)

{com}. 
. * Clean names
. foreach var of varlist firstname_def lastname_def {c -(}
{txt}  2{com}.         replace `var'=trim(itrim(`var'))
{txt}  3{com}.         replace `var'=subinstr(`var',".","",.)
{txt}  4{com}.         replace `var'=regexr(`var'," AKA .*$","")
{txt}  5{com}.         replace `var'=regexr(`var',", .*$","")
{txt}  6{com}.         replace `var'=regexr(`var'," JR$| I$| II$| III$| IV$| V$","")
{txt}  7{com}. {c )-}
{txt}(0 real changes made)
(62 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(7 real changes made)
(108 real changes made)
(1 real change made)
(1 real change made)
(313 real changes made)

{com}. drop DefendantName1_
{txt}
{com}. 
. *-------------------------------
. * Parse address
. *-------------------------------
. 
. * Parse address name
. replace Address1_ = subinstr(Address1_,".","",.)
{txt}(99,681 real changes made)

{com}. replace Address1_ = trim(itrim(Address1_))
{txt}(134 real changes made)

{com}. 
. * Drop invalid addresses
. drop if inlist(Address1_, "MAILING ADDRESS", "HELPING UP MISSION", "UNKNOWN", "BCDC", "UNK", "RCI", "MCCF") 
{txt}(11,310 observations deleted)

{com}. drop if regexm(Address1_,"HOMELESS|NO FIXED|RESCUE MISSION|CODE BLUE SHELTER")
{txt}(7,192 observations deleted)

{com}. drop if missing(Address1_)
{txt}(153 observations deleted)

{com}. drop if regexm(Address1_,"APPROX AGE")
{txt}(643 observations deleted)

{com}. drop if regexm(Address1_,"APPROX [0-9]")
{txt}(399 observations deleted)

{com}. drop if regexm(Address1_,"^AKA ")
{txt}(13,062 observations deleted)

{com}. drop if regexm(Address1_,"^AKA:")
{txt}(16,210 observations deleted)

{com}. drop if regexm(Address1_,"^A/K/A ")
{txt}(258 observations deleted)

{com}. drop if regexm(Address1_,"^A/K/A:")
{txt}(1,104 observations deleted)

{com}. drop if regexm(Address1_,"UNKNOWN")
{txt}(115 observations deleted)

{com}. drop if regexm(Address1_,"^CURRENTLY")
{txt}(65 observations deleted)

{com}. drop if regexm(Address1_,"^APT ")
{txt}(2,246 observations deleted)

{com}. drop if regexm(Address1_,"MAILING ADDRESS")
{txt}(489 observations deleted)

{com}. drop if !inlist(State1_,"","MD")
{txt}(87,560 observations deleted)

{com}. 
. * Trim floor
. replace Address1_ = regexr(Address1_," [A-Z]+ FL$","")
{txt}(129 real changes made)

{com}. replace Address1_ = regexr(Address1_," [A-Z]+ FLR$","")
{txt}(27 real changes made)

{com}. replace Address1_ = regexr(Address1_," [0-9][A-Z]+ FL$","")
{txt}(5,704 real changes made)

{com}. replace Address1_ = regexr(Address1_," [0-9][A-Z]+ FLR$","")
{txt}(1,027 real changes made)

{com}. replace Address1_ = regexr(Address1_," [A-Z]+ FLOOR$","")
{txt}(76 real changes made)

{com}. replace Address1_ = regexr(Address1_," [0-9][A-Z]+ FLOOR$","")
{txt}(1,920 real changes made)

{com}. 
. * Start address parse
. gen temp = Address1_
{txt}
{com}. rename City1_ city
{res}{txt}
{com}. rename State1_ state
{res}{txt}
{com}. 
. * Clean city
. replace city = trim(itrim(city))
{txt}(0 real changes made)

{com}. replace city = regexr(city,"^`","")
{txt}(6 real changes made)

{com}. replace city = regexr(city,"^]","")
{txt}(5 real changes made)

{com}. replace city = regexr(city,",$","")
{txt}(228 real changes made)

{com}. replace city = regexr(city,"\.$","")
{txt}(177 real changes made)

{com}. replace city = regexr(city,"`$","")
{txt}(23 real changes made)

{com}. replace city = regexr(city,", MD$","")
{txt}(2 real changes made)

{com}. replace city = regexr(city," MD$","")
{txt}(14 real changes made)

{com}. replace city = regexr(city,"^[\]","")
{txt}(23 real changes made)

{com}. replace city = trim(itrim(city))
{txt}(5 real changes made)

{com}. 
. * Flag apartmentment without unit#
. 
. replace temp = regexr(temp," STAPT$"," ST")
{txt}(2 real changes made)

{com}. replace temp = regexr(temp," APT$","")
{txt}(1,594 real changes made)

{com}. 
. * Spacing after comma
. replace temp = subinstr(temp,",", ", ",.)
{txt}variable {bf}temp{sf} was {bf}{res}str30{sf}{txt} now {bf}{res}str32{sf}
{txt}(54,014 real changes made)

{com}. replace temp = trim(itrim(temp))
{txt}(53,585 real changes made)

{com}. 
. * Parse apartment number
. gen aptno = ""
{txt}(1,063,823 missing values generated)

{com}. replace aptno = regexs(1) if regexm(temp," APT (.+$)")
{txt}variable {bf}aptno{sf} was {bf}{res}str1{sf}{txt} now {bf}{res}str20{sf}
{txt}(79,851 real changes made)

{com}. replace temp = regexr(temp," APT .+$","")
{txt}(79,851 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," APT#(.+)")
{txt}(2,445 real changes made)

{com}. replace temp = regexr(temp," APT#.+","")
{txt}(2,445 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," APT-(.+$)")
{txt}(998 real changes made)

{com}. replace temp = regexr(temp," APT-.+$","")
{txt}(998 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," APT([A-Z]$)")
{txt}(670 real changes made)

{com}. replace temp = regexr(temp," APT[A-Z]$","")
{txt}(670 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," APT([0-9]$)")
{txt}(764 real changes made)

{com}. replace temp = regexr(temp," APT[0-9]$","")
{txt}(764 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," \#([0-9]+$)")
{txt}(62,554 real changes made)

{com}. replace temp = regexr(temp," \#[0-9]+$","")
{txt}(62,554 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," \#(.+$)")
{txt}variable {bf}aptno{sf} was {bf}{res}str20{sf}{txt} now {bf}{res}str25{sf}
{txt}(15,324 real changes made)

{com}. replace temp = regexr(temp," \#.+$","")
{txt}(15,324 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp,"\#(.+$)")
{txt}variable {bf}aptno{sf} was {bf}{res}str25{sf}{txt} now {bf}{res}str29{sf}
{txt}(1,458 real changes made)

{com}. replace temp = regexr(temp,"\#.+$","")
{txt}(1,458 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," STAPT (.*$)")
{txt}(60 real changes made)

{com}. replace temp = regexr(temp," STAPT .*$"," ST")
{txt}(60 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," UNIT (.*$)")
{txt}(690 real changes made)

{com}. replace temp = regexr(temp," UNIT .*$"," ST")
{txt}(690 real changes made)

{com}. replace temp = trim(itrim(temp))
{txt}(0 real changes made)

{com}. 
. * Clean apartment number
. replace aptno = subinstr(aptno," ","",.)
{txt}(5,818 real changes made)

{com}. replace aptno = subinstr(aptno,"-","",.)
{txt}(4,254 real changes made)

{com}. replace aptno = subinstr(aptno,"#","",.)
{txt}(3,566 real changes made)

{com}. 
. * Apartment dummy
. gen isapt = temp != Address1_
{txt}
{com}. 
. * Get rid of everything after comma
. replace temp = regexr(temp,", .*$","")
{txt}(28,311 real changes made)

{com}. 
. * Street direction
. replace temp = subinstr(temp,",","",.)
{txt}(25,681 real changes made)

{com}. gen stdir = regexs(1) if regexm(temp,"( SE$| SW$| NE$| NW$| S$| N$| E$| W$)")
{txt}(1,062,295 missing values generated)

{com}. replace stdir = trim(itrim(stdir))
{txt}(1,528 real changes made)

{com}. replace temp = regexr(temp,"( SE$| SW$| NE$| NW$| S$| N$| E$| W$)","")
{txt}(1,528 real changes made)

{com}. replace temp = trim(itrim(temp))
{txt}(164 real changes made)

{com}. 
. * Street type
. replace temp = regexr(temp," ROAD$", " RD")
{txt}(35,625 real changes made)

{com}. replace temp = regexr(temp," DRIVE$", " DR")
{txt}(31,921 real changes made)

{com}. replace temp = regexr(temp," COURT$", " CT")
{txt}(18,056 real changes made)

{com}. replace temp = regexr(temp," AVENUE$", " AVE")
{txt}(33,061 real changes made)

{com}. replace temp = regexr(temp," AV$", " AVE")
{txt}(80,781 real changes made)

{com}. replace temp = regexr(temp," AVE`$", " AVE")
{txt}(9 real changes made)

{com}. replace temp = regexr(temp," AAVE$", " AVE")
{txt}(11 real changes made)

{com}. replace temp = regexr(temp," AE$", " AVE")
{txt}(19 real changes made)

{com}. replace temp = regexr(temp," STREET$", " ST")
{txt}(58,804 real changes made)

{com}. replace temp = regexr(temp," LANE$", " LN")
{txt}(19,737 real changes made)

{com}. replace temp = regexr(temp," PLACE$", " PL")
{txt}(10,587 real changes made)

{com}. replace temp = regexr(temp," CIRCLE$", " CIR")
{txt}(5,579 real changes made)

{com}. replace temp = regexr(temp," CR$", " CIR")
{txt}(3,646 real changes made)

{com}. replace temp = regexr(temp," TERRACE$", " TER")
{txt}(5,100 real changes made)

{com}. replace temp = regexr(temp," TR$", " TER")
{txt}(1,954 real changes made)

{com}. replace temp = regexr(temp," TERR$", " TER")
{txt}(4,183 real changes made)

{com}. replace temp = regexr(temp," BOULEVARD$", " BLVD")
{txt}(150 real changes made)

{com}. replace temp = regexr(temp," BV$", " BLVD")
{txt}(1,489 real changes made)

{com}. replace temp = regexr(temp," HIGHWAY$", " HWY")
{txt}(619 real changes made)

{com}. replace temp = regexr(temp," HW$", " HWY")
{txt}(352 real changes made)

{com}. replace temp = regexr(temp," WY$", " WAY")
{txt}(2,755 real changes made)

{com}. replace temp = regexr(temp," PARKWAY$", " PKWY")
{txt}(2,051 real changes made)

{com}. replace temp = regexr(temp," PW$", " PKWY")
{txt}(798 real changes made)

{com}. replace temp = regexr(temp," TRAIL$", " TRL")
{txt}(441 real changes made)

{com}. gen sttype = regexs(1) if regexm(temp,"( RD$| DR$| CT$| AVE$| ST$| LN$| WAY$| PL$| CIR$| TER$| BLVD$| HWY$| PKWY$| PIKE$| TRL$)")
{txt}(73,185 missing values generated)

{com}. replace sttype = trim(itrim(sttype))
{txt}(990,638 real changes made)

{com}. replace temp = regexr(temp,"( RD$| DR$| CT$| AVE$| ST$| LN$| WAY$| PL$| CIR$| TER$| BLVD$| HWY$| PKWY$| PIKE$| TRL$)","")
{txt}(990,638 real changes made)

{com}. 
. * Street number
. gen stno = regexs(1) if regexm(temp,"^([0-9]+[-]?[A-Z]?) ")
{txt}(39,087 missing values generated)

{com}. replace stno = trim(itrim(stno))
{txt}(0 real changes made)

{com}. replace temp = regexr(temp,"^([0-9]+[-]?[A-Z]?) ","")
{txt}(1,024,736 real changes made)

{com}. 
. * Street name
. rename temp stname
{res}{txt}
{com}. replace stname = regexr(stname," SQUARE$"," SQ")
{txt}(969 real changes made)

{com}. replace stname = regexr(stname," CRESCENT$"," CRES")
{txt}(21 real changes made)

{com}. replace stname = regexr(stname," ALLEY$"," ALY")
{txt}(24 real changes made)

{com}. replace stname = regexr(stname," VIEW$"," VW")
{txt}(1,723 real changes made)

{com}. replace stname = regexr(stname," HEIGHTS$"," HTS")
{txt}(5,699 real changes made)

{com}. replace stname = regexr(stname," RIDGE$"," RDG")
{txt}(2,502 real changes made)

{com}. replace stname = regexr(stname," KNOLL$"," KNL")
{txt}(219 real changes made)

{com}. replace stname = regexr(stname," GATEWAY$"," GTWY")
{txt}(16 real changes made)

{com}. replace stname = regexr(stname," COVE$"," CV")
{txt}(184 real changes made)

{com}. replace stname = regexr(stname," PLAZA$"," PLZ")
{txt}(34 real changes made)

{com}. 
. * Clean zipcode
. tostring ZipCode1_, gen(zipcode)
{txt}zipcode generated as {res:str5}

{com}. 
. * Full address
. gen address = stno + " " + stname + " " + sttype + " " + stdir
{txt}
{com}. replace address = trim(itrim(address))
{txt}(1,062,535 real changes made)

{com}. 
. * If address is apartment in one case, make it the same in all cases
. egen temp = max(isapt), by(address zipcode)
{txt}
{com}. replace isapt = temp
{txt}(59,719 real changes made)

{com}. drop temp
{txt}
{com}. 
. *-----------------------
. * Save file
. *-----------------------
. 
. * Save
. compress
  {txt}variable {bf}isapt{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}lastname_def{sf} was {bf}{res}str28{sf}{txt} now {bf}{res}str22{sf}
  {txt}variable {bf}firstname_def{sf} was {bf}{res}str26{sf}{txt} now {bf}{res}str18{sf}
  {txt}variable {bf}stname{sf} was {bf}{res}str32{sf}{txt} now {bf}{res}str30{sf}
  {txt}variable {bf}aptno{sf} was {bf}{res}str29{sf}{txt} now {bf}{res}str25{sf}
  {txt}variable {bf}stdir{sf} was {bf}{res}str3{sf}{txt} now {bf}{res}str2{sf}
  {txt}variable {bf}sttype{sf} was {bf}{res}str5{sf}{txt} now {bf}{res}str4{sf}
  {txt}variable {bf}address{sf} was {bf}{res}str33{sf}{txt} now {bf}{res}str31{sf}
{txt}  (28,723,221 bytes saved)

{com}. save data\crimcases_address.dta, replace
{txt}file data\crimcases_address.dta saved

{com}. 
. **************************** Clean addresses in civil courts data *********************************
. 
. *--------------------------------------------------
. * Load data
. *--------------------------------------------------
. 
. * Load data
. cd "$workpath"
{res}H:\Dropbox\work\replicationpack
{txt}
{com}. use data\raw\mdcourts\civilcases_raw.dta, replace
{txt}
{com}. 
. * Flag case type
. gen foreclosure = regexm(case_type,"^FORECLOSURE$|FORECLOSURE - RESIDENTIAL")
{txt}
{com}. gen ftpr = regexm(case_type,"FAILURE TO PAY RENT")
{txt}
{com}. gen lien = regexm(case_type,"LIEN")
{txt}
{com}. gen judgment = regexm(case_type,"JUDGMENT")
{txt}
{com}. gen smallclaims = regexm(case_type,"SMALL CLAIMS")
{txt}
{com}. 
. * Only keep relevant cases
. keep if foreclosure|ftpr|lien|judgment|smallclaims
{txt}(4,119,982 observations deleted)

{com}. 
. * Keep relevant variables
. keep casenumber case_type case_date zip address_to_use foreclosure lien name
{txt}
{com}. 
. *--------------------------------------------------
. * Parse defendant name
. *--------------------------------------------------
. 
. * Parse name
. replace name = upper(trim(itrim(name)))
{txt}(868,445 real changes made)

{com}. gen lname_civdef = regexs(1) if regexm(name,"(.*)(,)(.*)")
{txt}(114,905 missing values generated)

{com}. gen fname_civdef = regexs(3) if regexm(name,"(.*)(,)(.*)")
{txt}(115,014 missing values generated)

{com}. replace fname_civdef = word(fname_civdef,1) // Only take first part of first name
{txt}(2,056,707 real changes made)

{com}. 
. * Clean names
. foreach var of varlist fname_civdef lname_civdef {c -(}
{txt}  2{com}.         replace `var'=trim(itrim(`var'))
{txt}  3{com}.         replace `var'=subinstr(`var',".","",.)
{txt}  4{com}.         replace `var'=regexr(`var'," AKA .*$","")
{txt}  5{com}.         replace `var'=regexr(`var',", .*$","")
{txt}  6{com}.         replace `var'=regexr(`var'," JR$| I$| II$| III$| IV$| V$","")
{txt}  7{com}. {c )-}
{txt}(0 real changes made)
(3,252 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(326 real changes made)
(2,891 real changes made)
(388 real changes made)
(6,224 real changes made)
(1,861 real changes made)

{com}. drop name
{txt}
{com}. 
. *--------------------------------------------------
. * Parse address
. *--------------------------------------------------
. 
. * Convert ZIP code
. tostring zip, replace
{txt}zip was {res:double} now {res:str11}

{com}. rename zip zipcode_civil
{res}{txt}
{com}. 
. * Parse address name
. replace address_to_use = subinstr(address_to_use,".","",.)
{txt}(0 real changes made)

{com}. replace address_to_use = trim(itrim(address_to_use))
{txt}(38,758 real changes made)

{com}. replace address_to_use = upper(address_to_use)
{txt}(0 real changes made)

{com}. drop if missing(address_to_use)
{txt}(0 observations deleted)

{com}. 
. * Trim floor
. replace address_to_use = regexr(address_to_use," [A-Z]+ FL$","")
{txt}(20 real changes made)

{com}. replace address_to_use = regexr(address_to_use," [A-Z]+ FLR$","")
{txt}(5 real changes made)

{com}. replace address_to_use = regexr(address_to_use," [0-9][A-Z]+ FL$","")
{txt}(322 real changes made)

{com}. replace address_to_use = regexr(address_to_use," [0-9][A-Z]+ FLR$","")
{txt}(227 real changes made)

{com}. replace address_to_use = regexr(address_to_use," [A-Z]+ FLOOR$","")
{txt}(70 real changes made)

{com}. replace address_to_use = regexr(address_to_use," [0-9][A-Z]+ FLOOR$","")
{txt}(744 real changes made)

{com}. 
. * Start address parse
. gen temp = address_to_use
{txt}
{com}. 
. * Flag apartmentment without unit#
. replace temp = regexr(temp," STAPT$"," ST")
{txt}(1 real change made)

{com}. replace temp = regexr(temp," APT$","")
{txt}(431 real changes made)

{com}. 
. * Spacing after comma
. replace temp = subinstr(temp,",", ", ",.)
{txt}(0 real changes made)

{com}. replace temp = trim(itrim(temp))
{txt}(0 real changes made)

{com}. 
. * Parse apartment number
. gen aptno = ""
{txt}(2,171,778 missing values generated)

{com}. replace aptno = regexs(1) if regexm(temp," APARTMENT (.+$)")
{txt}variable {bf}aptno{sf} was {bf}{res}str1{sf}{txt} now {bf}{res}str18{sf}
{txt}(1,199 real changes made)

{com}. replace temp = regexr(temp," APARTMENT .+$","")
{txt}(1,199 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," APT (.+$)")
{txt}variable {bf}aptno{sf} was {bf}{res}str18{sf}{txt} now {bf}{res}str34{sf}
{txt}(199,702 real changes made)

{com}. replace temp = regexr(temp," APT .+$","")
{txt}(199,702 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," SUITE (.+$)")
{txt}variable {bf}aptno{sf} was {bf}{res}str34{sf}{txt} now {bf}{res}str64{sf}
{txt}(5,717 real changes made)

{com}. replace temp = regexr(temp," SUITE .+$","")
{txt}(5,717 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," APT#(.+)")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," APT#.+","")
{txt}(0 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," APT-(.+$)")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," APT-.+$","")
{txt}(0 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," APT([A-Z]$)")
{txt}(2,677 real changes made)

{com}. replace temp = regexr(temp," APT[A-Z]$","")
{txt}(2,677 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," APT([0-9]$)")
{txt}(1,135 real changes made)

{com}. replace temp = regexr(temp," APT[0-9]$","")
{txt}(1,135 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," \#([0-9]+$)")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," \#[0-9]+$","")
{txt}(0 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," \#(.+$)")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," \#.+$","")
{txt}(0 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp,"\#(.+$)")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp,"\#.+$","")
{txt}(0 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," STAPT (.*$)")
{txt}(7 real changes made)

{com}. replace temp = regexr(temp," STAPT .*$"," ST")
{txt}(7 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," UNIT (.*$)")
{txt}(29,993 real changes made)

{com}. replace temp = regexr(temp," UNIT .*$"," ST")
{txt}(29,994 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," ([A-Z][0-9]+)$")
{txt}(31,624 real changes made)

{com}. replace temp = regexr(temp," [A-Z][0-9]+$","")
{txt}(31,625 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," ([0-9]+[A-Z])$")
{txt}(37,738 real changes made)

{com}. replace temp = regexr(temp," [0-9]+[A-Z]$","")
{txt}(37,739 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," ([0-9]+)$")
{txt}(236,397 real changes made)

{com}. replace temp = regexr(temp," [0-9]+$","")
{txt}(236,402 real changes made)

{com}. replace temp = regexr(temp," [0-9]+$","")
{txt}(2,663 real changes made)

{com}. replace aptno = regexs(1) + aptno if regexm(temp," ([A-D])$")
{txt}(53,969 real changes made)

{com}. replace temp = regexr(temp," [A-D]$","")
{txt}(53,969 real changes made)

{com}. replace temp = regexr(temp," [A-D]$","")
{txt}(59 real changes made)

{com}. replace temp = trim(itrim(temp))
{txt}(0 real changes made)

{com}. 
. * Clean apartment number
. replace aptno = subinstr(aptno," ","",.)
{txt}(8,709 real changes made)

{com}. replace aptno = subinstr(aptno,"-","",.)
{txt}(0 real changes made)

{com}. replace aptno = subinstr(aptno,"#","",.)
{txt}(0 real changes made)

{com}. 
. * Apartment dummy
. gen isapt = temp != address_to_use
{txt}
{com}. 
. * Get rid of everything after comma
. replace temp = regexr(temp,", .*$","")
{txt}(0 real changes made)

{com}. 
. * Street direction
. replace temp = subinstr(temp,",","",.)
{txt}(0 real changes made)

{com}. gen stdir = regexs(1) if regexm(temp,"( SE$| SW$| NE$| NW$| S$| N$| E$| W$)")
{txt}(2,144,892 missing values generated)

{com}. replace stdir = trim(itrim(stdir))
{txt}(26,886 real changes made)

{com}. replace temp = regexr(temp,"( SE$| SW$| NE$| NW$| S$| N$| E$| W$)","")
{txt}(26,886 real changes made)

{com}. replace temp = trim(itrim(temp))
{txt}(0 real changes made)

{com}. 
. * Street type
. replace temp = regexr(temp," ROAD$", " RD")
{txt}(234,130 real changes made)

{com}. replace temp = regexr(temp," DRIVE$", " DR")
{txt}(175,288 real changes made)

{com}. replace temp = regexr(temp," COURT$", " CT")
{txt}(160,339 real changes made)

{com}. replace temp = regexr(temp," AVENUE$", " AVE")
{txt}(89,049 real changes made)

{com}. replace temp = regexr(temp," AV$", " AVE")
{txt}(245 real changes made)

{com}. replace temp = regexr(temp," AVE`$", " AVE")
{txt}(2 real changes made)

{com}. replace temp = regexr(temp," AAVE$", " AVE")
{txt}(2 real changes made)

{com}. replace temp = regexr(temp," AE$", " AVE")
{txt}(20 real changes made)

{com}. replace temp = regexr(temp," STREET$", " ST")
{txt}(92,238 real changes made)

{com}. replace temp = regexr(temp," LANE$", " LN")
{txt}(93,109 real changes made)

{com}. replace temp = regexr(temp," PLACE$", " PL")
{txt}(41,509 real changes made)

{com}. replace temp = regexr(temp," CIRCLE$", " CIR")
{txt}(70,019 real changes made)

{com}. replace temp = regexr(temp," CR$", " CIR")
{txt}(3,393 real changes made)

{com}. replace temp = regexr(temp," TERRACE$", " TER")
{txt}(17,411 real changes made)

{com}. replace temp = regexr(temp," TR$", " TER")
{txt}(97 real changes made)

{com}. replace temp = regexr(temp," TERR$", " TER")
{txt}(4,606 real changes made)

{com}. replace temp = regexr(temp," BOULEVARD$", " BLVD")
{txt}(2,973 real changes made)

{com}. replace temp = regexr(temp," BV$", " BLVD")
{txt}(15 real changes made)

{com}. replace temp = regexr(temp," HIGHWAY$", " HWY")
{txt}(3,312 real changes made)

{com}. replace temp = regexr(temp," HW$", " HWY")
{txt}(15 real changes made)

{com}. replace temp = regexr(temp," WY$", " WAY")
{txt}(1,226 real changes made)

{com}. replace temp = regexr(temp," PARKWAY$", " PKWY")
{txt}(5,640 real changes made)

{com}. replace temp = regexr(temp," PW$", " PKWY")
{txt}(3 real changes made)

{com}. replace temp = regexr(temp," TRAIL$", " TRL")
{txt}(3,006 real changes made)

{com}. gen sttype = regexs(1) if regexm(temp,"( RD$| DR$| CT$| AVE$| ST$| LN$| WAY$| PL$| CIR$| TER$| BLVD$| HWY$| PKWY$| PIKE$| TRL$)")
{txt}(165,430 missing values generated)

{com}. replace sttype = trim(itrim(sttype))
{txt}(2,006,348 real changes made)

{com}. replace temp = regexr(temp,"( RD$| DR$| CT$| AVE$| ST$| LN$| WAY$| PL$| CIR$| TER$| BLVD$| HWY$| PKWY$| PIKE$| TRL$)","")
{txt}(2,006,348 real changes made)

{com}. 
. * Street number
. gen stno = regexs(1) if regexm(temp,"^([0-9]+[-]?[A-Z]?) ")
{txt}(22,242 missing values generated)

{com}. replace stno = trim(itrim(stno))
{txt}(0 real changes made)

{com}. replace temp = regexr(temp,"^([0-9]+[-]?[A-Z]?) ","")
{txt}(2,149,536 real changes made)

{com}. 
. * Street name
. rename temp stname
{res}{txt}
{com}. replace stname = regexr(stname," SQUARE$"," SQ")
{txt}(7,517 real changes made)

{com}. replace stname = regexr(stname," CRESCENT$"," CRES")
{txt}(191 real changes made)

{com}. replace stname = regexr(stname," ALLEY$"," ALY")
{txt}(449 real changes made)

{com}. replace stname = regexr(stname," VIEW$"," VW")
{txt}(6,117 real changes made)

{com}. replace stname = regexr(stname," HEIGHTS$"," HTS")
{txt}(3,690 real changes made)

{com}. replace stname = regexr(stname," RIDGE$"," RDG")
{txt}(25,568 real changes made)

{com}. replace stname = regexr(stname," KNOLL$"," KNL")
{txt}(2,223 real changes made)

{com}. replace stname = regexr(stname," GATEWAY$"," GTWY")
{txt}(1,036 real changes made)

{com}. replace stname = regexr(stname," COVE$"," CV")
{txt}(3,640 real changes made)

{com}. replace stname = regexr(stname," PLAZA$"," PLZ")
{txt}(223 real changes made)

{com}. 
. * Full address
. gen address = stno + " " + stname + " " + sttype + " " + stdir
{txt}
{com}. replace address = trim(itrim(address))
{txt}(2,146,890 real changes made)

{com}. 
. * If address is apartment in one case, make it the same in all cases in same address
. egen temp = max(isapt), by(address zipcode)
{txt}
{com}. replace isapt = temp
{txt}(44,500 real changes made)

{com}. drop temp
{txt}
{com}. 
. * Rename
. rename casenumber civcasenum
{res}{txt}
{com}. 
. *--------------------------------------------------
. * Save data
. *--------------------------------------------------
. 
. * Save
. compress
  {txt}variable {bf}foreclosure{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}lien{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}isapt{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}address_to_use{sf} was {bf}{res}str118{sf}{txt} now {bf}{res}str98{sf}
  {txt}variable {bf}lname_civdef{sf} was {bf}{res}str101{sf}{txt} now {bf}{res}str73{sf}
  {txt}variable {bf}fname_civdef{sf} was {bf}{res}str78{sf}{txt} now {bf}{res}str20{sf}
  {txt}variable {bf}stname{sf} was {bf}{res}str98{sf}{txt} now {bf}{res}str91{sf}
  {txt}variable {bf}aptno{sf} was {bf}{res}str64{sf}{txt} now {bf}{res}str57{sf}
  {txt}variable {bf}stdir{sf} was {bf}{res}str3{sf}{txt} now {bf}{res}str2{sf}
  {txt}variable {bf}sttype{sf} was {bf}{res}str5{sf}{txt} now {bf}{res}str4{sf}
  {txt}variable {bf}address{sf} was {bf}{res}str99{sf}{txt} now {bf}{res}str97{sf}
{txt}  (288,846,474 bytes saved)

{com}. save data\civilcases_address.dta, replace
{txt}file data\civilcases_address.dta saved

{com}. 
. ************************** Link civil to criminal cases by address ************************
. 
. *--------------------------------------
. * Merge civil and criminal courts data
. *--------------------------------------
. 
. * Load criminal case addresses
. cd "$workpath"
{res}H:\Dropbox\work\replicationpack
{txt}
{com}. use data\crimcases_address.dta, clear
{txt}
{com}. drop if missing(address)
{txt}(138 observations deleted)

{com}. 
. * Rename variables
. foreach var of varlist zipcode city state aptno isapt {c -(}
{txt}  2{com}.         rename `var' `var'_case
{txt}  3{com}. {c )-}
{res}{txt}
{com}. 
. * Join to civil case addresses
. joinby address using data\civilcases_address.dta, update unm(m)
{txt}
{com}. keep if _m == 3
{txt}(747,327 observations deleted)

{com}. drop _merge
{txt}
{com}. 
. * Rename variables
. rename foreclosure civfcl
{res}{txt}
{com}. 
. *--------------------------------------------------
. * Basic name matching
. *--------------------------------------------------
. 
. /* Name match dummy
> br firstname_def fname_civdef lastname_def lname_civdef
> */
. gen namematch_full = lastname_def == lname_civdef & !missing(lastname_def) & firstname_def == fname_civdef & !missing(firstname_def)
{txt}
{com}. gen namematch_last = lastname_def == lname_civdef & !missing(lastname_def)
{txt}
{com}. 
. * Defendant name match
. gen defnamematch_full = namematch_full
{txt}
{com}. gen defnamematch_last = namematch_last
{txt}
{com}. 
. * Fullnames
. gen fullname_def = firstname_def + " " + lastname_def
{txt}
{com}. gen fullname_civdef = fname_civdef + " " + lname_civdef
{txt}
{com}. replace fullname_def = trim(itrim(fullname_def))
{txt}(0 real changes made)

{com}. replace fullname_civdef = trim(itrim(fullname_civdef))
{txt}(113,926 real changes made)

{com}. 
. *--------------------------------------------------
. * Family name matching
. *--------------------------------------------------
. 
. * Debtor ID
. egen civdefid=seq(), by(casenumber case_type case_date)
{txt}
{com}. duplicates report casenumber case_type case_date civdefid

{p 0 4}{txt}Duplicates in terms of {res} casenumber case_type case_date civdefid{p_end}

{txt}{hline 10}{c TT}{hline 27}
   copies {c |} observations       surplus
{hline 10}{c +}{hline 27}
        1 {c |}      {res}1981552             0
{txt}{hline 10}{c BT}{hline 27}

{com}. 
. * Merge with family names
. cap drop _merge
{txt}
{com}. joinby casenumber case_type case_date using data\raw\mdcourts\relnames_civcases.dta, update unm(m)
{txt}
{com}. drop _merge
{txt}
{com}. rename relative fname_rel
{res}{txt}
{com}. replace fname_rel = trim(itrim(upper(fname_rel)))
{txt}(6,280,804 real changes made)

{com}. drop n_relative source
{txt}
{com}. 
. * Get rid of special characters
. replace fname_rel = subinstr(fname_rel,",","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,"/","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,".","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,`"""',"",.)
{txt}(20 real changes made)

{com}. 
. * Parse family name
. gen firstname_rel = word(fname_rel,1)
{txt}(1,849,658 missing values generated)

{com}. gen lastname_rel = word(fname_rel,2)
{txt}(1,849,658 missing values generated)

{com}. 
. * Family name match
. gen fammatch_full = lastname_def == lastname_rel & !missing(lastname_def) & firstname_def == firstname_rel & !missing(firstname_def)
{txt}
{com}. gen fammatch_last = lastname_def == lastname_rel & !missing(lastname_def)
{txt}
{com}. 
. *--------------------------------------------------
. * Collapse by civdef-defendant pair
. *--------------------------------------------------
. 
. * Collapse
. drop *name_rel
{txt}
{com}. foreach var of varlist fammatch_full fammatch_last {c -(}
{txt}  2{com}.         egen temp = max(`var'), by(casenumber case_type case_date civdefid)
{txt}  3{com}.         replace `var'=temp
{txt}  4{com}.         drop temp
{txt}  5{com}. {c )-}
{txt}(1,602,159 real changes made)
(1,540,885 real changes made)

{com}. duplicates drop

{p 0 4}{txt}Duplicates in terms of {txt} all variables{p_end}

(6,148,910 observations deleted)

{com}. duplicates report casenumber case_type case_date civdefid

{p 0 4}{txt}Duplicates in terms of {res} casenumber case_type case_date civdefid{p_end}

{txt}{hline 10}{c TT}{hline 27}
   copies {c |} observations       surplus
{hline 10}{c +}{hline 27}
        1 {c |}      {res}1981552             0
{txt}{hline 10}{c BT}{hline 27}

{com}. 
. * Include name match
. replace namematch_full = 1 if fammatch_full==1
{txt}(23,902 real changes made)

{com}. replace namematch_last = 1 if fammatch_last==1
{txt}(14,032 real changes made)

{com}. 
. *--------------------------------------------------
. * Get defendant family members
. *--------------------------------------------------
. 
. * Merge with family names
. joinby casenumber using data\raw\mdcourts\relnames_defendants.dta, update unm(m)
{txt}
{com}. drop _merge
{txt}
{com}. rename relative fname_rel
{res}{txt}
{com}. replace fname_rel = trim(itrim(upper(fname_rel)))
{txt}(17,641,206 real changes made)

{com}. drop n_relative source
{txt}
{com}. 
. * Get rid of special characters
. replace fname_rel = subinstr(fname_rel,",","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,"/","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,".","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,`"""',"",.)
{txt}(5 real changes made)

{com}. 
. * Parse family name
. gen firstname_rel = word(fname_rel,1)
{txt}(1,545,754 missing values generated)

{com}. gen lastname_rel = word(fname_rel,2)
{txt}(1,545,754 missing values generated)

{com}. 
. * Family name match
. replace fammatch_full = 1 if lname_civdef == lastname_rel & !missing(lname_civdef) & fname_civdef == firstname_rel & !missing(fname_civdef)
{txt}(25,385 real changes made)

{com}. replace fammatch_last = 1 if lname_civdef == lastname_rel & !missing(lname_civdef)
{txt}(594,792 real changes made)

{com}. 
. *--------------------------------------------------
. * Collapse by civdef-defendant pair
. *--------------------------------------------------
. 
. * Collapse
. drop *name_rel
{txt}
{com}. foreach var of varlist fammatch_full fammatch_last {c -(}
{txt}  2{com}.         egen temp = max(`var'), by(casenumber case_type case_date civdefid)
{txt}  3{com}.         replace `var'=temp
{txt}  4{com}.         drop temp
{txt}  5{com}. {c )-}
{txt}(1,056,890 real changes made)
(1,588,910 real changes made)

{com}. duplicates drop

{p 0 4}{txt}Duplicates in terms of {txt} all variables{p_end}

(17,205,408 observations deleted)

{com}. duplicates report casenumber case_type case_date civdefid

{p 0 4}{txt}Duplicates in terms of {res} casenumber case_type case_date civdefid{p_end}

{txt}{hline 10}{c TT}{hline 27}
   copies {c |} observations       surplus
{hline 10}{c +}{hline 27}
        1 {c |}      {res}1981552             0
{txt}{hline 10}{c BT}{hline 27}

{com}. 
. * Include name match
. replace namematch_full = 1 if fammatch_full==1
{txt}(21,100 real changes made)

{com}. replace namematch_last = 1 if fammatch_last==1
{txt}(15,563 real changes made)

{com}. 
. *--------------------------------------------------
. * Filter based on zip and apartment matches
. *--------------------------------------------------
. 
. * Only keep if same zip code
. destring zipcode*, replace
{txt}zipcode_case: all characters numeric; {res}replaced {txt}as {res}long
{txt}(169 missing values generated)
{res}{txt}zipcode_civil: all characters numeric; {res}replaced {txt}as {res}double
{txt}
{com}. gen samezip = zipcode_civil == zipcode_case
{txt}
{com}. drop if !samezip
{txt}(277,679 observations deleted)

{com}. 
. * Flag apartment only if apartments don't match and one is missing
. gen sameapt = aptno == aptno_case
{txt}
{com}. gen isapt_civ = (isapt_case | isapt)
{txt}
{com}. gen exactapt = sameapt & !missing(aptno)
{txt}
{com}. drop if !sameapt
{txt}(1,082,280 observations deleted)

{com}. 
. *--------------------------------------------------
. * Date gap between hearing and transaction
. *--------------------------------------------------
. 
. * Get number of days between INIT and event
. foreach var of varlist civfcl lien {c -(}
{txt}  2{com}.         gen dates_init_`var' = case_date - DateINIT1 if `var' == 1
{txt}  3{com}. {c )-}
{txt}(403,910 missing values generated)
(327,656 missing values generated)

{com}. 
. * Get information on first event after case
. foreach var of varlist civfcl lien {c -(}
{txt}  2{com}.         egen post`var'date = min(dates_init_`var') if dates_init_`var'>0, by(casenumber)
{txt}  3{com}. {c )-}
{txt}(461,418 missing values generated)
(408,421 missing values generated)

{com}. 
. * Get information on last event before case
. foreach var of varlist civfcl lien {c -(}
{txt}  2{com}.         egen pre`var'date = max(dates_init_`var') if dates_init_`var'<0, by(casenumber)
{txt}  3{com}. {c )-}
{txt}(522,616 missing values generated)
(499,816 missing values generated)

{com}. 
. *--------------------------------------------------
. * Generate transaction dummies
. *--------------------------------------------------
. 
. * Event dummies - post-hearing dummies at various horizons (in days)
. foreach var of varlist civfcl lien {c -(}
{txt}  2{com}.         foreach days in 91 182 365 730 1105 {c -(}   
{txt}  3{com}.                 gen byte `var'_`days' = inrange(dates_init_`var',1,`days')
{txt}  4{com}.                 gen byte `var'_`days'pre = inrange(dates_init_`var',-`days',-1)         
{txt}  5{com}.                 
.                 gen byte `var'_nml_`days' = inrange(dates_init_`var',1,`days') & namematch_last == 1
{txt}  6{com}.                 gen byte `var'_nml_`days'pre = inrange(dates_init_`var',-`days',-1) & namematch_last == 1
{txt}  7{com}.                 
.                 gen byte `var'_nmf_`days' = inrange(dates_init_`var',1,`days') & namematch_full == 1
{txt}  8{com}.                 gen byte `var'_nmf_`days'pre = inrange(dates_init_`var',-`days',-1) & namematch_full == 1
{txt}  9{com}.                 
.                 gen byte `var'_fml_`days' = inrange(dates_init_`var',1,`days') & fammatch_last == 1
{txt} 10{com}.                 gen byte `var'_fml_`days'pre = inrange(dates_init_`var',-`days',-1) & fammatch_last == 1
{txt} 11{com}.                 
.                 gen byte `var'_fmf_`days' = inrange(dates_init_`var',1,`days') & fammatch_full == 1
{txt} 12{com}.                 gen byte `var'_fmf_`days'pre = inrange(dates_init_`var',-`days',-1) & fammatch_full == 1
{txt} 13{com}. 
.                 gen byte `var'_dml_`days' = inrange(dates_init_`var',1,`days') & defnamematch_last == 1
{txt} 14{com}.                 gen byte `var'_dml_`days'pre = inrange(dates_init_`var',-`days',-1) & defnamematch_last == 1
{txt} 15{com}.                 
.                 gen byte `var'_dmf_`days' = inrange(dates_init_`var',1,`days') & defnamematch_full == 1
{txt} 16{com}.                 gen byte `var'_dmf_`days'pre = inrange(dates_init_`var',-`days',-1) & defnamematch_full == 1
{txt} 17{com}. 
.         {c )-}
{txt} 18{com}. {c )-}
{txt}
{com}. 
. * Convert days to months
. rename *91* *3mos*
{res}{txt}
{com}. rename *182* *6mos*
{res}{txt}
{com}. rename *365* *1yr*
{res}{txt}
{com}. rename *730* *2yr*
{res}{txt}
{com}. rename *1105* *3yr*
{res}{txt}
{com}. 
. * If too many matches, then likely not the same unit in a larger apartment
. egen nummatches = count(DateINIT1), by(casenumber address)
{txt}
{com}. drop if nummatches > 30 & !exactapt
{txt}(84,996 observations deleted)

{com}. 
. 
. *--------------------------------------------------
. * Collapse and save
. *--------------------------------------------------
. 
. * Collapse
. collapse (max) civfcl*_* lien*_* isapt_civ post*date pre*date, by(casenumber)
{txt}
{com}. 
. * Save
. compress
  {txt}variable {bf}isapt_civ{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}postcivfcldate{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}postliendate{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}precivfcldate{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}preliendate{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
{txt}  (2,329,261 bytes saved)

{com}. save data\cases_civlinked.dta, replace
{txt}file data\cases_civlinked.dta saved

{com}. 
. 
. ********************************* Clean and parse bankruptcy addresses ************************************/
. 
. *--------------------------------------------------
. * Load data and reshape
. *--------------------------------------------------
. 
. * Load data
. cd "$workpath"
{res}H:\Dropbox\work\replicationpack
{txt}
{com}. use data\raw\bankruptcy\gnw_bankruptcy_raw.dta, clear
{txt}
{com}. 
. * Keep relevant variables keep 
. keep caseno file_date fdate fyear chapter office county role? address? zip? name?
{txt}
{com}. 
. * Reshape
. reshape long role address zip name, i(caseno) j(partynum)
{txt}(note: j = 1 2 3 4 5)

Data{col 36}wide{col 43}->{col 48}long
{hline 77}
Number of obs.                 {res}  557826   {txt}->{res} 2.8e+06
{txt}Number of variables            {res}      27   {txt}->{res}      12
{txt}j variable (5 values)                     ->   {res}partynum
{txt}xij variables:
                  {res}role1 role2 ... role5   {txt}->   {res}role
         address1 address2 ... address5   {txt}->   {res}address
                     zip1 zip2 ... zip5   {txt}->   {res}zip
                  name1 name2 ... name5   {txt}->   {res}name
{txt}{hline 77}

{com}. 
. * Only keep defendants
. replace role = trim(role)
{txt}(0 real changes made)

{com}. keep if role == "Debtor"
{txt}(2,166,054 observations deleted)

{com}. 
. *--------------------------------------------------
. * Parse debtor's name
. *--------------------------------------------------
. 
. * Parse debtor name - start with first name
. replace name = upper(trim(itrim(name)))
{txt}(622,604 real changes made)

{com}. gen fname_debtor = upper(name)
{txt}
{com}. 
. * Clean name
. replace fname_debtor = subinstr(fname_debtor,".","",.)
{txt}(258,732 real changes made)

{com}. replace fname_debtor = regexr(fname_debtor,", ([A-Z]+$)","") // Parse out suffix (Jr, II, III, etc.)
{txt}(51,670 real changes made)

{com}. 
. * Parse last name
. gen lname_debtor = regexs(1) if regexm(fname_debtor," ([A-Z]+$)")
{txt}(13,316 missing values generated)

{com}. replace lname_debtor = trim(itrim(lname_debtor))
{txt}(0 real changes made)

{com}. 
. * Only keep first part of first name
. replace fname_debtor = word(fname_debtor,1) // Only take first part of first name
{txt}(622,490 real changes made)

{com}. drop name
{txt}
{com}. 
. *--------------------------------------------------
. * Parse address
. *--------------------------------------------------
. 
. * Format zip and fips
. rename zip zipcode
{res}{txt}
{com}. tostring zipcode, replace
{txt}zipcode was {res:long} now {res:str5}

{com}. replace zipcode = "" if zipcode == "."
{txt}(21,850 real changes made)

{com}. replace zipcode = "" if !regexm(zipcode,"^[0-9][0-9][0-9][0-9][0-9]$")
{txt}(432 real changes made)

{com}. 
. * Clean address
. replace address = upper(address)
{txt}(601,833 real changes made)

{com}. replace address = trim(itrim(address))
{txt}(75,450 real changes made)

{com}. replace address = regexr(address,"; \( +\)$","")
{txt}(72,452 real changes made)

{com}. replace address = subinstr(address,"&QUOT;","",.)
{txt}(81 real changes made)

{com}. 
. * Split full address by semi-colon
. split address, parse(";")
{res}variables created as string: 
{txt}{col 1}address1{col 11}address2{col 21}address3{col 31}address4{col 41}address5{col 51}address6

{com}. rename address fulladd
{res}{txt}
{com}. foreach var of varlist address? {c -(}
{txt}  2{com}.         replace `var' = trim(itrim(`var'))
{txt}  3{com}. {c )-}
{txt}(0 real changes made)
(601,932 real changes made)
(84,962 real changes made)
(4,130 real changes made)
(58 real changes made)
(1 real change made)

{com}. 
. * Apartment flag
. gen isapt = 0
{txt}
{com}. gen pobox = 0
{txt}
{com}. forvalues i = 1/4{c -(}
{txt}  2{com}.         replace address`i' = subinstr(address`i',",","",.)
{txt}  3{com}.         replace address`i' = subinstr(address`i',".","",.)
{txt}  4{com}.         replace address`i' = trim(itrim(address`i'))
{txt}  5{com}.         replace isapt = 1 if regexm(address`i',"^APT|^APARTMENT|^UNIT-|^UNIT |^SUITE |^#|^NO [0-9]|^NO\. [0-9]")
{txt}  6{com}.         replace pobox = 1 if regexm(address`i',"^BOX|^P\.O\.|^PO BOX")
{txt}  7{com}. {c )-}
{txt}(28,116 real changes made)
(116,648 real changes made)
(51 real changes made)
(322 real changes made)
(17,429 real changes made)
(547,077 real changes made)
(37,377 real changes made)
(4 real changes made)
(47,446 real changes made)
(1,506 real changes made)
(54,684 real changes made)
(824 real changes made)
(7 real changes made)
(144 real changes made)
(46 real changes made)
(416 real changes made)
(10 real changes made)
(1 real change made)
(8 real changes made)
(0 real changes made)

{com}. 
. * Parse apartment
. gen aptno = ""
{txt}(623,076 missing values generated)

{com}. foreach i in 3 1 2 {c -(}
{txt}  2{com}.         gen temp = " " + address`i' // leading zero
{txt}  3{com}.         replace aptno = regexs(1) if regexm(temp," APT (.+$)")
{txt}  4{com}.         replace aptno = regexs(1) if regexm(temp," APT-(.+$)")
{txt}  5{com}.         replace aptno = regexs(1) if regexm(temp," APARTMENT (.+$)")
{txt}  6{com}.         replace aptno = regexs(1) if regexm(temp," UNIT (.+$)")
{txt}  7{com}.         replace aptno = regexs(1) if regexm(temp," UNIT-(.+$)")
{txt}  8{com}.         replace aptno = regexs(1) if regexm(temp," #(.+$)")
{txt}  9{com}.         replace aptno = regexs(1) if regexm(temp," SUITE (.+$)")
{txt} 10{com}.         replace aptno = regexs(1) if regexm(temp," NO ([0-9]+$)")
{txt} 11{com}.         replace aptno = regexs(1) if regexm(temp," ([A-Z]-[0-9]+$)") 
{txt} 12{com}.         replace aptno = regexs(1) if regexm(temp," ([A-Z][0-9]+$)")
{txt} 13{com}.         replace aptno = regexs(1) if regexm(temp," ([A-Z] [0-9]+$)")    
{txt} 14{com}.         drop temp
{txt} 15{com}. {c )-}
{txt}variable {bf}aptno{sf} was {bf}{res}str1{sf}{txt} now {bf}{res}str17{sf}
{txt}(60 real changes made)
(0 real changes made)
(4 real changes made)
(4 real changes made)
(0 real changes made)
(39 real changes made)
variable {bf}aptno{sf} was {bf}{res}str17{sf}{txt} now {bf}{res}str26{sf}
{txt}(64 real changes made)
(9 real changes made)
(2 real changes made)
(8 real changes made)
(1 real change made)
(15,411 real changes made)
(11 real changes made)
(267 real changes made)
(1,139 real changes made)
(0 real changes made)
variable {bf}aptno{sf} was {bf}{res}str26{sf}{txt} now {bf}{res}str33{sf}
{txt}(19,836 real changes made)
(269 real changes made)
(988 real changes made)
(545 real changes made)
(433 real changes made)
(21 real changes made)
(25,814 real changes made)
(12 real changes made)
(5,147 real changes made)
(2,219 real changes made)
(4 real changes made)
(12,883 real changes made)
(923 real changes made)
(3,161 real changes made)
(526 real changes made)
(335 real changes made)
(42 real changes made)

{com}. replace aptno = subinstr(aptno," ","",.)
{txt}(1,106 real changes made)

{com}. replace aptno = subinstr(aptno,"-","",.)
{txt}(5,437 real changes made)

{com}. replace aptno = subinstr(aptno,"#","",.)
{txt}(62 real changes made)

{com}. 
. * Reconcile
. gen address = address1
{txt}(21,092 missing values generated)

{com}. replace address = address2 if regexm(address,"^C/O")
{txt}(967 real changes made)

{com}. replace address = address3 if regexm(address,"^C/O")
{txt}(2 real changes made)

{com}. drop address?
{txt}
{com}. 
. * Clean address
. replace address = regexr(address," STAPT ", "ST APT")
{txt}(9 real changes made)

{com}. replace address = regexr(address," STE$", "ST")
{txt}(1 real change made)

{com}. 
. * Prepare address parse
. gen temp = address
{txt}(21,092 missing values generated)

{com}. replace temp = trim(itrim(temp))
{txt}(0 real changes made)

{com}. drop if regexm(temp,"PO BOX")
{txt}(17,042 observations deleted)

{com}. drop if regexm(temp,"P O BOX")
{txt}(600 observations deleted)

{com}. drop if regexm(temp,"^POB")
{txt}(1,047 observations deleted)

{com}. 
. * Trim apartment ending
. replace isapt = 1 if regexm(temp," APT .+$")
{txt}(15,320 real changes made)

{com}. replace temp = regexr(temp," APT .+$","")
{txt}(15,327 real changes made)

{com}. replace isapt = 1 if regexm(temp," APT-.+$")
{txt}(11 real changes made)

{com}. replace temp = regexr(temp," APT-.+$","")
{txt}(11 real changes made)

{com}. replace isapt = 1 if regexm(temp," APARTMENT .+$")
{txt}(236 real changes made)

{com}. replace temp = regexr(temp," APARTMENT .+$","")
{txt}(237 real changes made)

{com}. replace isapt = 1 if regexm(temp," UNIT .+$")
{txt}(1,128 real changes made)

{com}. replace temp = regexr(temp," UNIT .+$","")
{txt}(1,130 real changes made)

{com}. replace isapt = 1 if regexm(temp," UNIT-.+$")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," UNIT-.+$","")
{txt}(0 real changes made)

{com}. replace isapt = 1 if regexm(temp," #.+$")
{txt}(18,815 real changes made)

{com}. replace temp = regexr(temp," #.+$","")
{txt}(18,836 real changes made)

{com}. replace isapt = 1 if regexm(temp," SUITE .+$")
{txt}(279 real changes made)

{com}. replace temp = regexr(temp," SUITE .+$","")
{txt}(280 real changes made)

{com}. replace isapt = 1 if regexm(temp," NO [0-9]+$")
{txt}(955 real changes made)

{com}. replace temp = regexr(temp," NO [0-9]+$","")
{txt}(956 real changes made)

{com}. replace isapt = 1 if regexm(temp," [A-Z]-[0-9]+$")
{txt}(543 real changes made)

{com}. replace temp = regexr(temp," [A-Z]-[0-9]+$","")
{txt}(549 real changes made)

{com}. replace isapt = 1 if regexm(temp," [A-Z] [0-9]+$")
{txt}(21 real changes made)

{com}. replace temp = regexr(temp," [A-Z] [0-9]+$","")
{txt}(21 real changes made)

{com}. replace isapt = 1 if regexm(temp," [A-Z][0-9]+$")
{txt}(426 real changes made)

{com}. replace temp = regexr(temp," [A-Z][0-9]+$","")
{txt}(437 real changes made)

{com}. replace aptno = regexs(1) if regexm(temp," ([0-9]+$)") // parse apartment if end in numbers
{txt}(2,458 real changes made)

{com}. replace isapt = 1 if regexm(temp," [0-9]+$")
{txt}(2,403 real changes made)

{com}. replace temp = regexr(temp," [0-9]+$","")
{txt}(2,535 real changes made)

{com}. 
. * Parse floor
. replace temp = regexr(temp," FLOOR .*$","")
{txt}(8 real changes made)

{com}. replace temp = regexr(temp," FL .*$","")
{txt}(5 real changes made)

{com}. 
. 
. * Inspect
. replace temp = trim(itrim(temp))
{txt}(0 real changes made)

{com}. replace temp = regexr(temp,"-$","")
{txt}(172 real changes made)

{com}. replace temp = trim(itrim(temp))
{txt}(163 real changes made)

{com}. 
. * Abbreviate direction
. replace temp = regexr(temp," NORTH$"," N")
{txt}(423 real changes made)

{com}. replace temp = regexr(temp," SOUTH$"," S")
{txt}(982 real changes made)

{com}. replace temp = regexr(temp," WEST$"," W")
{txt}(544 real changes made)

{com}. replace temp = regexr(temp," EAST$"," E")
{txt}(406 real changes made)

{com}. replace temp = regexr(temp," NORTHEAST$"," NE")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," NORTHWEST$"," NW")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," SOUTHEAST$"," SE")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," SOUTHWEST$"," SW")
{txt}(0 real changes made)

{com}. 
. * Street direction
. gen stdir = regexs(1) if regexm(temp,"( SE$| SW$| NE$| NW$| S$| N$| E$| W$)")
{txt}(598,771 missing values generated)

{com}. replace stdir = trim(itrim(stdir))
{txt}(5,616 real changes made)

{com}. replace temp = regexr(temp,"( SE$| SW$| NE$| NW$| S$| N$| E$| W$)","")
{txt}(5,616 real changes made)

{com}. replace temp = trim(itrim(temp))
{txt}(0 real changes made)

{com}. 
. * Mispellings
. replace temp = regexr(temp,"TERACE","TERRACE")
{txt}(16 real changes made)

{com}. replace temp = regexr(temp,"DRIVEE","DRIVE")
{txt}(3 real changes made)

{com}. replace temp = regexr(temp," DIVE$","DRIVE")
{txt}(20 real changes made)

{com}. replace temp = regexr(temp," HGTS$","HTS")
{txt}(2 real changes made)

{com}. 
. * Street type
. replace temp = regexr(temp," ROAD$", " RD")
{txt}(116,950 real changes made)

{com}. replace temp = regexr(temp," DRIVE$", " DR")
{txt}(75,235 real changes made)

{com}. replace temp = regexr(temp," COURT$", " CT")
{txt}(57,233 real changes made)

{com}. replace temp = regexr(temp," AVENUE$", " AVE")
{txt}(69,612 real changes made)

{com}. replace temp = regexr(temp," AV$", " AVE")
{txt}(99 real changes made)

{com}. replace temp = regexr(temp," AVE`$", " AVE")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," AAVE$", " AVE")
{txt}(3 real changes made)

{com}. replace temp = regexr(temp," AE$", " AVE")
{txt}(1 real change made)

{com}. replace temp = regexr(temp," STREET$", " ST")
{txt}(49,911 real changes made)

{com}. replace temp = regexr(temp," STEET$", " ST") // mispelling
{txt}(21 real changes made)

{com}. replace temp = regexr(temp," LANE$", " LN")
{txt}(34,739 real changes made)

{com}. replace temp = regexr(temp," PLACE$", " PL")
{txt}(17,457 real changes made)

{com}. replace temp = regexr(temp," CIRCLE$", " CIR")
{txt}(15,375 real changes made)

{com}. replace temp = regexr(temp," CR$", " CIR")
{txt}(113 real changes made)

{com}. replace temp = regexr(temp," TERRACE$", " TER")
{txt}(8,784 real changes made)

{com}. replace temp = regexr(temp," TR$", " TER")
{txt}(69 real changes made)

{com}. replace temp = regexr(temp," TERR$", " TER")
{txt}(521 real changes made)

{com}. replace temp = regexr(temp," BOULEVARD$", " BLVD")
{txt}(1,084 real changes made)

{com}. replace temp = regexr(temp," BV$", " BLVD")
{txt}(2 real changes made)

{com}. replace temp = regexr(temp," HIGHWAY$", " HWY")
{txt}(1,549 real changes made)

{com}. replace temp = regexr(temp," HW$", " HWY")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," WY$", " WAY")
{txt}(25 real changes made)

{com}. replace temp = regexr(temp," PARKWAY$", " PKWY")
{txt}(3,203 real changes made)

{com}. replace temp = regexr(temp," PW$", " PKWY")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," TRAIL$", " TRL")
{txt}(1,161 real changes made)

{com}. gen sttype = regexs(1) if regexm(temp,"( RD$| DR$| CT$| AVE$| ST$| LN$| WAY$| PL$| CIR$| TER$| BLVD$| HWY$| PKWY$| PIKE$| TRL$)")
{txt}(40,986 missing values generated)

{com}. replace sttype = trim(itrim(sttype))
{txt}(563,401 real changes made)

{com}. replace temp = regexr(temp,"( RD$| DR$| CT$| AVE$| ST$| LN$| WAY$| PL$| CIR$| TER$| BLVD$| HWY$| PKWY$| PIKE$| TRL$)","")
{txt}(563,401 real changes made)

{com}. replace temp = trim(itrim(temp))
{txt}(0 real changes made)

{com}. 
. * Street number
. gen stno = regexs(1) if regexm(temp,"^([0-9]+[-]?[A-Z]?) ")
{txt}(26,790 missing values generated)

{com}. replace stno = trim(itrim(stno))
{txt}(0 real changes made)

{com}. replace temp = regexr(temp,"^([0-9]+[-]?[A-Z]?) ","")
{txt}(577,597 real changes made)

{com}. 
. * Street name
. rename temp stname
{res}{txt}
{com}. replace stname = regexr(stname," SQUARE$"," SQ")
{txt}(1,722 real changes made)

{com}. replace stname = regexr(stname," CRESCENT$"," CRES")
{txt}(37 real changes made)

{com}. replace stname = regexr(stname," ALLEY$"," ALY")
{txt}(76 real changes made)

{com}. replace stname = regexr(stname," VIEW$"," VW")
{txt}(1,821 real changes made)

{com}. replace stname = regexr(stname," HEIGHTS$"," HTS")
{txt}(1,773 real changes made)

{com}. replace stname = regexr(stname," RIDGE$"," RDG")
{txt}(4,087 real changes made)

{com}. replace stname = regexr(stname," KNOLL$"," KNL")
{txt}(436 real changes made)

{com}. replace stname = regexr(stname," GATEWAY$"," GTWY")
{txt}(171 real changes made)

{com}. replace stname = regexr(stname," COVE$"," CV")
{txt}(556 real changes made)

{com}. replace stname = regexr(stname," PLAZA$"," PLZ")
{txt}(80 real changes made)

{com}. 
. * Update address
. replace address = stno + " " + stname + " " + sttype + " " + stdir
{txt}variable {bf}address{sf} was {bf}{res}str40{sf}{txt} now {bf}{res}str43{sf}
{txt}(603,633 real changes made)

{com}. replace address = trim(itrim(address))
{txt}(599,467 real changes made)

{com}. 
. * If address is apartment in one case, make it the same in all cases
. egen temp = max(isapt), by(address zipcode)
{txt}
{com}. replace isapt = temp
{txt}(6,990 real changes made)

{com}. drop temp
{txt}
{com}. 
. *--------------------------------------------------
. * Save data
. *--------------------------------------------------
. 
. * Save
. isid caseno partynum
{txt}
{com}. compress
  {txt}variable {bf}isapt{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}pobox{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}fulladd{sf} was {bf}{res}str172{sf}{txt} now {bf}{res}str132{sf}
  {txt}variable {bf}role{sf} was {bf}{res}str27{sf}{txt} now {bf}{res}str6{sf}
  {txt}variable {bf}fname_debtor{sf} was {bf}{res}str92{sf}{txt} now {bf}{res}str23{sf}
  {txt}variable {bf}aptno{sf} was {bf}{res}str33{sf}{txt} now {bf}{res}str29{sf}
  {txt}variable {bf}address{sf} was {bf}{res}str43{sf}{txt} now {bf}{res}str40{sf}
  {txt}variable {bf}stdir{sf} was {bf}{res}str3{sf}{txt} now {bf}{res}str2{sf}
  {txt}variable {bf}sttype{sf} was {bf}{res}str5{sf}{txt} now {bf}{res}str4{sf}
{txt}  (87,636,115 bytes saved)

{com}. save data\gnw_bankruptcy_addresses.dta, replace
{txt}file data\gnw_bankruptcy_addresses.dta saved

{com}. 
. ********************************* Link bankruptcy addresses to criminal cases ************************************
. 
. *--------------------------------------------------
. * Join data 
. *--------------------------------------------------
. 
. * Load data
. cd "$workpath"
{res}H:\Dropbox\work\replicationpack
{txt}
{com}. use data\crimcases_address.dta, clear
{txt}
{com}. drop if missing(address)
{txt}(138 observations deleted)

{com}. 
. * Rename variables
. foreach var of varlist zipcode city state aptno isapt {c -(}
{txt}  2{com}.         rename `var' `var'_case
{txt}  3{com}. {c )-}
{res}{txt}
{com}. 
. * Join with Zillow transactions
. joinby address using data\gnw_bankruptcy_addresses.dta, update unm(m)
{txt}
{com}. keep if _m == 3
{txt}(774,510 observations deleted)

{com}. drop _merge
{txt}
{com}. 
. *--------------------------------------------------
. * Basic name matching
. *--------------------------------------------------
. 
. * Name match dummy
. gen namematch_full = lastname_def == lname_debtor & !missing(lastname_def) & firstname_def == fname_debtor & !missing(firstname_def)
{txt}
{com}. gen namematch_last = lastname_def == lname_debtor & !missing(lastname_def)
{txt}
{com}. 
. * Defendant name match
. gen defnamematch_full = namematch_full
{txt}
{com}. gen defnamematch_last = namematch_last
{txt}
{com}. 
. * Fullnames
. gen fullname_def = firstname_def + " " + lastname_def
{txt}
{com}. gen fullname_debtor = fname_debtor + " " + lname_debtor
{txt}
{com}. replace fullname_def = trim(itrim(fullname_def))
{txt}(0 real changes made)

{com}. replace fullname_debtor = trim(itrim(fullname_debtor))
{txt}(23,801 real changes made)

{com}. 
. *--------------------------------------------------
. * Family name matching 
. *--------------------------------------------------
. 
. * Debtor ID
. egen debtorid=seq(), by(casenumber caseno)
{txt}
{com}. isid casenumber caseno debtorid
{txt}
{com}. 
. * Merge with family names
. joinby casenumber caseno using data\raw\bankruptcy\relnames_bankruptcy.dta, update unm(m)
{txt}
{com}. drop _merge
{txt}
{com}. rename relative fname_rel
{res}{txt}
{com}. replace fname_rel = trim(itrim(upper(fname_rel)))
{txt}(2,735,347 real changes made)

{com}. drop n_relative source
{txt}
{com}. 
. * Get rid of special characters
. replace fname_rel = subinstr(fname_rel,",","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,"/","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,".","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,`"""',"",.)
{txt}(11 real changes made)

{com}. 
. * Parse family name
. gen firstname_rel = word(fname_rel,1)
{txt}(881,227 missing values generated)

{com}. gen lastname_rel = word(fname_rel,2)
{txt}(881,227 missing values generated)

{com}. 
. * Family name match
. gen fammatch_full = lastname_def == lastname_rel & !missing(lastname_def) & firstname_def == firstname_rel & !missing(firstname_def)
{txt}
{com}. gen fammatch_last = lastname_def == lastname_rel & !missing(lastname_def)
{txt}
{com}. 
. *--------------------------------------------------
. * Collapse by debtor-defendant
. *--------------------------------------------------
. 
. * Collapse
. drop *name_rel
{txt}
{com}. foreach var of varlist fammatch_full fammatch_last {c -(}
{txt}  2{com}.         egen temp = max(`var'), by(casenumber caseno debtorid)
{txt}  3{com}.         replace `var'=temp
{txt}  4{com}.         drop temp
{txt}  5{com}. {c )-}
{txt}(965,974 real changes made)
(907,693 real changes made)

{com}. duplicates drop

{p 0 4}{txt}Duplicates in terms of {txt} all variables{p_end}

(2,690,486 observations deleted)

{com}. isid casenumber caseno debtorid
{txt}
{com}. 
. * Include name match
. replace namematch_full = 1 if fammatch_full==1
{txt}(12,266 real changes made)

{com}. replace namematch_last = 1 if fammatch_last==1
{txt}(6,660 real changes made)

{com}. 
. *--------------------------------------------------
. * Get defendant family members
. *--------------------------------------------------
. 
. * Merge with family names
. joinby casenumber using data\raw\mdcourts\relnames_defendants.dta, update unm(m)
{txt}
{com}. drop _merge
{txt}
{com}. rename relative fname_rel
{res}{txt}
{com}. replace fname_rel = trim(itrim(upper(fname_rel)))
{txt}(6,872,516 real changes made)

{com}. drop n_relative source
{txt}
{com}. cap drop fullname
{txt}
{com}. 
. * Get rid of special characters
. replace fname_rel = subinstr(fname_rel,",","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,"/","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,".","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,`"""',"",.)
{txt}(7 real changes made)

{com}. 
. * Parse family name
. gen firstname_rel = word(fname_rel,1)
{txt}(776,011 missing values generated)

{com}. gen lastname_rel = word(fname_rel,2)
{txt}(776,011 missing values generated)

{com}. 
. * Family name match
. replace fammatch_full = 1 if lname_debtor == lastname_rel & !missing(lname_debtor) & fname_debtor == firstname_rel & !missing(fname_debtor)
{txt}(14,598 real changes made)

{com}. replace fammatch_last = 1 if lname_debtor == lastname_rel & !missing(lname_debtor)
{txt}(392,948 real changes made)

{com}. 
. *--------------------------------------------------
. * Collapse by debtor-defendant
. *--------------------------------------------------
. 
. * Collapse
. drop *name_rel
{txt}
{com}. foreach var of varlist fammatch_full fammatch_last {c -(}
{txt}  2{com}.         egen temp = max(`var'), by(casenumber caseno debtorid)
{txt}  3{com}.         replace `var'=temp
{txt}  4{com}.         drop temp
{txt}  5{com}. {c )-}
{txt}(681,883 real changes made)
(781,364 real changes made)

{com}. duplicates drop

{p 0 4}{txt}Duplicates in terms of {txt} all variables{p_end}

(6,722,439 observations deleted)

{com}. isid casenumber caseno debtorid
{txt}
{com}. 
. * Include name match
. replace namematch_full = 1 if fammatch_full==1
{txt}(12,586 real changes made)

{com}. replace namematch_last = 1 if fammatch_last==1
{txt}(7,697 real changes made)

{com}. 
. *--------------------------------------------------
. * Filter based on zip and apartment matches
. *--------------------------------------------------
. 
. * Only keep if same zip code
. destring zipcode*, replace
{txt}zipcode_case: all characters numeric; {res}replaced {txt}as {res}long
{txt}(47 missing values generated)
{res}{txt}zipcode: all characters numeric; {res}replaced {txt}as {res}long
{txt}(1665 missing values generated)
{res}{txt}
{com}. gen samezip = zipcode == zipcode_case
{txt}
{com}. drop if !samezip
{txt}(135,037 observations deleted)

{com}. 
. * Flag apartment only if apartments don't match and one is missing
. gen sameapt = aptno == aptno_case
{txt}
{com}. gen exactapt = sameapt & !missing(aptno)
{txt}
{com}. gen isapt_gnw = (isapt_case | isapt)
{txt}
{com}. drop if !sameapt
{txt}(524,429 observations deleted)

{com}. 
. *--------------------------------------------------
. * Date gap between hearing and transaction
. *--------------------------------------------------
. 
. * Get number of days between INIT and bankruptcy
. gen dates_init_bankrupt = fdate - DateINIT1
{txt}
{com}. 
. * Get information on first bankruptcy after case
. egen postbankruptdate = min(dates_init_bankrupt) if dates_init_bankrupt>0, by(casenumber)
{txt}(216,851 missing values generated)

{com}. 
. * Get information on last bankruptcy before case
. egen prebankruptdate = max(dates_init_bankrupt) if dates_init_bankrupt<0, by(casenumber)
{txt}(49,807 missing values generated)

{com}. 
. *--------------------------------------------------
. * Generate transaction dummies
. *--------------------------------------------------
. 
. * Bankruptcy dummies - post-hearing indicators at different horizons (in days)
. foreach days in 91 182 365 730 1105 {c -(}   
{txt}  2{com}.         gen bankrupt_`days' = inrange(dates_init_bankrupt,1,`days')     
{txt}  3{com}.         gen bankrupt_`days'pre = inrange(dates_init_bankrupt,-`days',-1)        
{txt}  4{com}.         gen bankrupt_nml_`days' = inrange(dates_init_bankrupt,1,`days') & namematch_last == 1
{txt}  5{com}.         gen bankrupt_nml_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & namematch_last == 1
{txt}  6{com}.         gen bankrupt_nmf_`days' = inrange(dates_init_bankrupt,1,`days') & namematch_full == 1
{txt}  7{com}.         gen bankrupt_nmf_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & namematch_full == 1
{txt}  8{com}.         gen bankrupt_fml_`days' = inrange(dates_init_bankrupt,1,`days') & fammatch_last == 1
{txt}  9{com}.         gen bankrupt_fml_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & fammatch_last == 1
{txt} 10{com}.         gen bankrupt_fmf_`days' = inrange(dates_init_bankrupt,1,`days') & fammatch_full == 1
{txt} 11{com}.         gen bankrupt_fmf_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & fammatch_full == 1
{txt} 12{com}.         gen bankrupt_dml_`days' = inrange(dates_init_bankrupt,1,`days') & defnamematch_last == 1
{txt} 13{com}.         gen bankrupt_dml_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & defnamematch_last == 1
{txt} 14{com}.         gen bankrupt_dmf_`days' = inrange(dates_init_bankrupt,1,`days') & defnamematch_full == 1
{txt} 15{com}.         gen bankrupt_dmf_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & defnamematch_full == 1
{txt} 16{com}. {c )-}
{txt}
{com}. 
. * Bankruptcy dummies - chapter 7 and 13
. foreach type in 7 13 {c -(}
{txt}  2{com}.         foreach days in 91 182 365 730 1105 {c -(}   
{txt}  3{com}.                 gen bankrupt`type'_`days' = inrange(dates_init_bankrupt,1,`days') & chapter == `type'
{txt}  4{com}.                 gen bankrupt`type'_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & chapter == `type'
{txt}  5{com}.                 gen bankrupt`type'_nml_`days' = inrange(dates_init_bankrupt,1,`days') & chapter == `type' & namematch_last == 1
{txt}  6{com}.                 gen bankrupt`type'_nml_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & chapter == `type' & namematch_last == 1
{txt}  7{com}.                 gen bankrupt`type'_nmf_`days' = inrange(dates_init_bankrupt,1,`days') & chapter == `type' & namematch_full == 1
{txt}  8{com}.                 gen bankrupt`type'_nmf_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & chapter == `type' & namematch_full == 1
{txt}  9{com}.                 gen bankrupt`type'_fml_`days' = inrange(dates_init_bankrupt,1,`days') & chapter == `type' & fammatch_last == 1
{txt} 10{com}.                 gen bankrupt`type'_fml_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & chapter == `type' & fammatch_last == 1
{txt} 11{com}.                 gen bankrupt`type'_fmf_`days' = inrange(dates_init_bankrupt,1,`days') & chapter == `type' & fammatch_full == 1
{txt} 12{com}.                 gen bankrupt`type'_fmf_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & chapter == `type' & fammatch_full == 1
{txt} 13{com}.                 gen bankrupt`type'_dml_`days' = inrange(dates_init_bankrupt,1,`days') & chapter == `type' & defnamematch_last == 1
{txt} 14{com}.                 gen bankrupt`type'_dml_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & chapter == `type' & defnamematch_last == 1
{txt} 15{com}.                 gen bankrupt`type'_dmf_`days' = inrange(dates_init_bankrupt,1,`days') & chapter == `type' & defnamematch_full == 1
{txt} 16{com}.                 gen bankrupt`type'_dmf_`days'pre = inrange(dates_init_bankrupt,-`days',-1) & chapter == `type' & defnamematch_full == 1
{txt} 17{com}.                 
.         {c )-}
{txt} 18{com}. {c )-}
{txt}
{com}. 
. * Rename variables based on months instead of days
. rename *91* *3mos*
{res}{txt}
{com}. rename *182* *6mos*
{res}{txt}
{com}. rename *365* *1yr*
{res}{txt}
{com}. rename *730* *2yr*
{res}{txt}
{com}. rename *1105* *3yr*
{res}{txt}
{com}. 
. * Drop if too many matches and apartment/name does not match exactly
. egen nummatches = count(DateINIT1), by(casenumber address)
{txt}
{com}. drop if nummatches > 10 & !exactapt & !namematch_full & !namematch_last
{txt}(1,662 observations deleted)

{com}. 
. *--------------------------------------------------
. * Collapse and save
. *--------------------------------------------------
. 
. * Collapse
. gsort casenumber postbankruptdate
{txt}
{com}. collapse (max) bankrupt*_* isapt_gnw p*bankruptdate, by(casenumber)
{txt}
{com}. 
. * Save
. compress
  {txt}variable {bf}bankrupt_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nml_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nml_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nmf_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nmf_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fml_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fml_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fmf_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fmf_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dml_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dml_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dmf_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dmf_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nml_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nml_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nmf_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nmf_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fml_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fml_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fmf_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fmf_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dml_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dml_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dmf_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dmf_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nml_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nml_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nmf_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nmf_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fml_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fml_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fmf_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fmf_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dml_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dml_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dmf_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dmf_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nml_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nml_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nmf_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nmf_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fml_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fml_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fmf_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fmf_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dml_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dml_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dmf_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dmf_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nml_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nml_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nmf_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_nmf_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fml_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fml_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fmf_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_fmf_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dml_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dml_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dmf_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt_dmf_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nml_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nml_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nmf_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nmf_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fml_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fml_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fmf_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fmf_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dml_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dml_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dmf_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dmf_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nml_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nml_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nmf_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nmf_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fml_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fml_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fmf_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fmf_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dml_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dml_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dmf_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dmf_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nml_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nml_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nmf_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nmf_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fml_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fml_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fmf_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fmf_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dml_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dml_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dmf_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dmf_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nml_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nml_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nmf_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nmf_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fml_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fml_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fmf_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fmf_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dml_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dml_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dmf_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dmf_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nml_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nml_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nmf_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_nmf_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fml_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fml_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fmf_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_fmf_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dml_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dml_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dmf_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt7_dmf_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nml_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nml_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nmf_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nmf_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fml_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fml_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fmf_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fmf_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dml_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dml_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dmf_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dmf_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nml_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nml_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nmf_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nmf_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fml_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fml_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fmf_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fmf_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dml_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dml_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dmf_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dmf_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nml_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nml_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nmf_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nmf_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fml_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fml_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fmf_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fmf_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dml_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dml_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dmf_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dmf_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nml_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nml_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nmf_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nmf_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fml_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fml_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fmf_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fmf_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dml_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dml_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dmf_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dmf_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nml_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nml_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nmf_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_nmf_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fml_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fml_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fmf_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_fmf_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dml_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dml_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dmf_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bankrupt13_dmf_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}isapt_gnw{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}postbankruptdate{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}prebankruptdate{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
{txt}  (109,516,862 bytes saved)

{com}. save data\cases_gnwlinked.dta, replace
{txt}file data\cases_gnwlinked.dta saved

{com}. 
. *************************** Clean and parse single-property addresses **********************************
. 
. *--------------------------------------------------
. * Load data
. *--------------------------------------------------
. 
. cd "$workpath"
{res}H:\Dropbox\work\replicationpack
{txt}
{com}. use data\raw\zillow\addresses_zillow.dta, clear
{txt}
{com}. 
. * Drop multi-property transactions
. duplicates tag v1, gen(dflag)

{p 0 4}{txt}Duplicates in terms of {res} v1{p_end}
{txt}
{com}. drop if dflag >= 1
{txt}(2,225,247 observations deleted)

{com}. drop dflag
{txt}
{com}. 
. * Lower case variable names
. rename *, lower
{res}{txt}
{com}. 
. *--------------------------------------------------
. * Parse addresses
. *--------------------------------------------------
. 
. * Format zip and fips
. rename zip zipcode
{res}{txt}
{com}. tostring zipcode, replace
{txt}zipcode was {res:long} now {res:str5}

{com}. replace zipcode = "" if zipcode == "."
{txt}(38,138 real changes made)

{com}. 
. * Street direction
. gen temp = trim(itrim(address))
{txt}
{com}. gen stdir = regexs(1) if regexm(temp,"( SE$| SW$| NE$| NW$| S$| N$| E$| W$)")
{txt}(8,728,101 missing values generated)

{com}. replace stdir = trim(itrim(stdir))
{txt}(91,693 real changes made)

{com}. replace temp = regexr(temp,"( SE$| SW$| NE$| NW$| S$| N$| E$| W$)","")
{txt}(91,693 real changes made)

{com}. 
. * Street type
. replace temp = regexr(temp," ROAD$", " RD")
{txt}(839 real changes made)

{com}. replace temp = regexr(temp," DRIVE$", " DR")
{txt}(175 real changes made)

{com}. replace temp = regexr(temp," COURT$", " CT")
{txt}(111 real changes made)

{com}. replace temp = regexr(temp," AVENUE$", " AVE")
{txt}(100 real changes made)

{com}. replace temp = regexr(temp," AV$", " AVE")
{txt}(2 real changes made)

{com}. replace temp = regexr(temp," AVE`$", " AVE")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," AAVE$", " AVE")
{txt}(0 real changes made)

{com}. replace temp = regexr(temp," AE$", " AVE")
{txt}(1 real change made)

{com}. replace temp = regexr(temp," STREET$", " ST")
{txt}(88 real changes made)

{com}. replace temp = regexr(temp," LANE$", " LN")
{txt}(129 real changes made)

{com}. replace temp = regexr(temp," PLACE$", " PL")
{txt}(34 real changes made)

{com}. replace temp = regexr(temp," CIRCLE$", " CIR")
{txt}(18 real changes made)

{com}. replace temp = regexr(temp," CR$", " CIR")
{txt}(6 real changes made)

{com}. replace temp = regexr(temp," TERRACE$", " TER")
{txt}(23 real changes made)

{com}. replace temp = regexr(temp," TR$", " TER")
{txt}(9 real changes made)

{com}. replace temp = regexr(temp," TERR$", " TER")
{txt}(17 real changes made)

{com}. replace temp = regexr(temp," BOULEVARD$", " BLVD")
{txt}(3 real changes made)

{com}. replace temp = regexr(temp," BV$", " BLVD")
{txt}(1 real change made)

{com}. replace temp = regexr(temp," HIGHWAY$", " HWY")
{txt}(34 real changes made)

{com}. replace temp = regexr(temp," HW$", " HWY")
{txt}(1 real change made)

{com}. replace temp = regexr(temp," WY$", " WAY")
{txt}(5 real changes made)

{com}. replace temp = regexr(temp," PARKWAY$", " PKWY")
{txt}(433 real changes made)

{com}. replace temp = regexr(temp," PW$", " PKWY")
{txt}(4 real changes made)

{com}. replace temp = regexr(temp," TRAIL$", " TRL")
{txt}(30 real changes made)

{com}. gen sttype = regexs(1) if regexm(temp,"( RD$| DR$| CT$| AVE$| ST$| LN$| WAY$| PL$| CIR$| TER$| BLVD$| HWY$| PKWY$| PIKE$| TRL$)")
{txt}(176,260 missing values generated)

{com}. replace sttype = trim(itrim(sttype))
{txt}(8,643,534 real changes made)

{com}. replace temp = regexr(temp,"( RD$| DR$| CT$| AVE$| ST$| LN$| WAY$| PL$| CIR$| TER$| BLVD$| HWY$| PKWY$| PIKE$| TRL$)","")
{txt}(8,643,534 real changes made)

{com}. 
. * Street number
. gen stno = regexs(1) if regexm(temp,"^([0-9]+[-]?[A-Z]?) ")
{txt}(118,103 missing values generated)

{com}. replace stno = trim(itrim(stno))
{txt}(0 real changes made)

{com}. replace temp = regexr(temp,"^([0-9]+[-]?[A-Z]?) ","")
{txt}(8,701,691 real changes made)

{com}. 
. * Street name
. rename temp stname
{res}{txt}
{com}. replace stname = regexr(stname," SQUARE$"," SQ")
{txt}(8,978 real changes made)

{com}. replace stname = regexr(stname," CRESCENT$"," CRES")
{txt}(250 real changes made)

{com}. replace stname = regexr(stname," ALLEY$"," ALY")
{txt}(152 real changes made)

{com}. replace stname = regexr(stname," VIEW$"," VW")
{txt}(44,615 real changes made)

{com}. replace stname = regexr(stname," HEIGHTS$"," HTS")
{txt}(20,525 real changes made)

{com}. replace stname = regexr(stname," RIDGE$"," RDG")
{txt}(76,671 real changes made)

{com}. replace stname = regexr(stname," KNOLL$"," KNL")
{txt}(7,908 real changes made)

{com}. replace stname = regexr(stname," GATEWAY$"," GTWY")
{txt}(329 real changes made)

{com}. replace stname = regexr(stname," COVE$"," CV")
{txt}(12,856 real changes made)

{com}. replace stname = regexr(stname," PLAZA$"," PLZ")
{txt}(21 real changes made)

{com}. 
. * Update address
. replace address = stno + " " + stname + " " + sttype + " " + stdir
{txt}(8,737,993 real changes made)

{com}. replace address = trim(itrim(address))
{txt}(8,736,026 real changes made)

{com}. 
. *--------------------------------------------------
. * Save data
. *--------------------------------------------------
. 
. * Save list of single-property transaction addresses
. compress
  {txt}variable {bf}address{sf} was {bf}{res}str60{sf}{txt} now {bf}{res}str55{sf}
  {txt}variable {bf}stname{sf} was {bf}{res}str58{sf}{txt} now {bf}{res}str52{sf}
  {txt}variable {bf}stdir{sf} was {bf}{res}str3{sf}{txt} now {bf}{res}str2{sf}
  {txt}variable {bf}sttype{sf} was {bf}{res}str5{sf}{txt} now {bf}{res}str4{sf}
{txt}  (114,657,322 bytes saved)

{com}. save data\raw\zillow\addresses_zillowcleaned.dta, replace
{txt}file data\raw\zillow\addresses_zillowcleaned.dta saved

{com}. 
. ***************************** Merge and clean ZTRAX data  **********************************
. 
. *--------------------------------------------------
. * Load data and clean
. *--------------------------------------------------
. 
. * Get Zillow raw transactions
. cd "$workpath"
{res}H:\Dropbox\work\replicationpack
{txt}
{com}. use data\raw\zillow\transactions_zillow.dta, replace
{txt}
{com}. 
. * Rename variables
. rename v1 transid
{res}{txt}
{com}. rename v2 fips
{res}{txt}
{com}. rename v3 st_abbr
{res}{txt}
{com}. rename v4 county
{res}{txt}
{com}. rename v7 date_record
{res}{txt}
{com}. rename v25 salepriceamt
{res}{txt}
{com}. rename v38 lender
{res}{txt}
{com}. rename v39 lendertype
{res}{txt}
{com}. rename v60 loanamt
{res}{txt}
{com}. rename v68 date_loandue
{res}{txt}
{com}. rename v71 intrate
{res}{txt}
{com}. 
. * Drop irrelevant variables
. drop v*
{txt}
{com}. rename *, lower
{res}{txt}  (all {it:newnames}=={it:oldnames})

{com}. rename transid v1 // temporarily for matching
{res}{txt}
{com}. 
. *--------------------------------------------------
. * Merge with addresses, foreclosures, and buyer names
. *--------------------------------------------------
. 
. * Add address
. merge 1:1 v1 using data\raw\zillow\addresses_zillowcleaned.dta, keep(1 3)
{res}
{txt}{col 5}Result{col 38}# of obs.
{col 5}{hline 41}
{col 5}not matched{col 30}{res}         193,823
{txt}{col 9}from master{col 30}{res}         193,823{txt}  (_merge==1)
{col 9}from using{col 30}{res}               0{txt}  (_merge==2)

{col 5}matched{col 30}{res}       8,819,794{txt}  (_merge==3)
{col 5}{hline 41}

{com}. drop if _m == 1
{txt}(193,823 observations deleted)

{com}. drop _m
{txt}
{com}. 
. * Add foreclosures
. merge 1:1 v1 using data\raw\zillow\foreclosures_zillow.dta, keep(1 3)
{res}
{txt}{col 5}Result{col 38}# of obs.
{col 5}{hline 41}
{col 5}not matched{col 30}{res}       8,473,159
{txt}{col 9}from master{col 30}{res}       8,473,159{txt}  (_merge==1)
{col 9}from using{col 30}{res}               0{txt}  (_merge==2)

{col 5}matched{col 30}{res}         346,635{txt}  (_merge==3)
{col 5}{hline 41}

{com}. gen foreclosed = (_m==3)
{txt}
{com}. drop _m
{txt}
{com}. 
. * Add buyer names
. merge 1:m v1 using data\raw\zillow\names_zillow.dta, keep(1 3)
{res}
{txt}{col 5}Result{col 38}# of obs.
{col 5}{hline 41}
{col 5}not matched{col 30}{res}       4,557,908
{txt}{col 9}from master{col 30}{res}       4,557,908{txt}  (_merge==1)
{col 9}from using{col 30}{res}               0{txt}  (_merge==2)

{col 5}matched{col 30}{res}       5,897,401{txt}  (_merge==3)
{col 5}{hline 41}

{com}. drop _m 
{txt}
{com}. rename v1 transid
{res}{txt}
{com}. rename First fname_buyer
{res}{txt}
{com}. rename Last lname_buyer
{res}{txt}
{com}. 
. * Clean buyer names
. foreach var of varlist fname_buyer lname_buyer {c -(}
{txt}  2{com}.         replace `var'=upper(trim(itrim(`var')))
{txt}  3{com}.         replace `var'=trim(itrim(`var'))
{txt}  4{com}.         replace `var'=subinstr(`var',".","",.)
{txt}  5{com}.         replace `var'=regexr(`var'," AKA .*$","")
{txt}  6{com}.         replace `var'=regexr(`var',", .*$","")
{txt}  7{com}.         replace `var'=regexr(`var'," JR$| I$| II$| III$| IV$| V$","")
{txt}  8{com}. {c )-}
{txt}(18,657 real changes made)
(0 real changes made)
(6,423 real changes made)
(18 real changes made)
(15,550 real changes made)
(85,072 real changes made)
(186 real changes made)
(0 real changes made)
(59 real changes made)
(0 real changes made)
(1,028 real changes made)
(191,877 real changes made)

{com}. replace fname_buyer=word(fname_buyer,1) // Take first word of first name
{txt}(3,929,915 real changes made)

{com}. 
. *--------------------------------------------------
. * Clean data and save
. *--------------------------------------------------
. 
. * Clean variables
. rename *, lower
{res}{txt}  (all {it:newnames}=={it:oldnames})

{com}. tostring fips, replace
{txt}fips was {res:int} now {res:str5}

{com}. 
. * Reformat dates
. foreach datevar of varlist date_record date_loandue {c -(}
{txt}  2{com}.         gen temp = date(`datevar',"YMD")
{txt}  3{com}.         move temp `datevar'
{txt}  4{com}.         format temp %td
{txt}  5{com}.         drop `datevar'
{txt}  6{com}.         rename temp `datevar'
{txt}  7{com}. {c )-}
{res}{txt}(6,716,623 missing values generated)
{res}{txt}
{com}. 
. * Get year - only keep post 1994 (very few observations pre-1993)
. gen year = year(date_record)
{txt}
{com}. keep if year >= 1993
{txt}(369 observations deleted)

{com}. 
. * Save
. compress
  {txt}variable {bf}date_record{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}foreclosed{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}year{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}fname_buyer{sf} was {bf}{res}str41{sf}{txt} now {bf}{res}str27{sf}
{txt}  (219,553,740 bytes saved)

{com}. save data\transactions_cleaned.dta, replace
{txt}file data\transactions_cleaned.dta saved

{com}. 
. ************************** Link cases to Zillow transactions (buyer name) **********************************
. 
. *--------------------------------------------------
. * Link cases to Zillow transactions
. *--------------------------------------------------
. 
. * Load data
. cd "$workpath"
{res}H:\Dropbox\work\replicationpack
{txt}
{com}. use data\crimcases_address.dta, clear
{txt}
{com}. drop if missing(address)
{txt}(138 observations deleted)

{com}. 
. * Rename variables
. foreach var of varlist zipcode city state {c -(}
{txt}  2{com}.         rename `var' `var'_case
{txt}  3{com}. {c )-}
{res}{txt}
{com}. 
. * Join with Zillow transactions
. joinby address using data\transactions_cleaned.dta, update unm(m)
{txt}
{com}. keep if _m == 3
{txt}(500,347 observations deleted)

{com}. drop _merge
{txt}
{com}. 
. *--------------------------------------------------
. * Create filters
. *--------------------------------------------------
. 
. * Zip code filter
. gen samezip = zipcode == zipcode_case
{txt}
{com}. drop if !samezip
{txt}(562,101 observations deleted)

{com}. 
. *--------------------------------------------------
. * Basic name matching
. *--------------------------------------------------
. 
. * Fullnames
. gen fullname_def = firstname_def + " " + lastname_def
{txt}
{com}. gen fullname_buyer = fname_buyer + " " + lname_buyer
{txt}
{com}. replace fullname_def = trim(itrim(fullname_def))
{txt}(0 real changes made)

{com}. replace fullname_buyer = trim(itrim(fullname_buyer))
{txt}(2,731,530 real changes made)

{com}. 
. * Name match dummy
. gen namematch_full = 0
{txt}
{com}. gen namematch_last = 0
{txt}
{com}. replace namematch_full = 1 if lastname_def == lname_buyer & regexm(fname_buyer,firstname_def) & !missing(lastname_def) & !missing(firstname_def)
{txt}(23,451 real changes made)

{com}. replace namematch_last = 1 if lastname_def == lname_buyer & !missing(lastname_def)      
{txt}(113,756 real changes made)

{com}. 
. * Defendant name match
. gen defnamematch_full = namematch_full
{txt}
{com}. gen defnamematch_last = namematch_last
{txt}
{com}. 
. *--------------------------------------------------
. * Family name matching
. *--------------------------------------------------
. 
. * Debtor ID
. egen buyerid=seq(), by(casenumber transid)
{txt}
{com}. isid casenumber transid buyerid
{txt}
{com}. 
. * Merge with family names
. joinby casenumber transid using data\raw\zillow\relnames_zillow.dta, update unm(m)
{txt}
{com}. drop _merge
{txt}
{com}. rename relative fname_rel
{res}{txt}
{com}. replace fname_rel = trim(itrim(upper(fname_rel)))
{txt}(1,966,873 real changes made)

{com}. drop n_relative source
{txt}
{com}. 
. * Get rid of special characters
. replace fname_rel = subinstr(fname_rel,",","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,"/","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,".","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,`"""',"",.)
{txt}(0 real changes made)

{com}. 
. * Parse family name
. gen firstname_rel = word(fname_rel,1)
{txt}(4,489,795 missing values generated)

{com}. gen lastname_rel = word(fname_rel,2)
{txt}(4,489,795 missing values generated)

{com}. 
. * Family name match
. gen fammatch_full = lastname_def == lastname_rel & !missing(lastname_def) & firstname_def == firstname_rel & !missing(firstname_def)
{txt}
{com}. gen fammatch_last = lastname_def == lastname_rel & !missing(lastname_def)
{txt}
{com}. 
. *--------------------------------------------------
. * Collapse by buyer-defendant
. *--------------------------------------------------
. 
. * Collapse
. drop *name_rel
{txt}
{com}. foreach var of varlist fammatch_full fammatch_last {c -(}
{txt}  2{com}.         egen temp = max(`var'), by(casenumber transid buyerid)
{txt}  3{com}.         replace `var'=temp
{txt}  4{com}.         drop temp
{txt}  5{com}. {c )-}
{txt}(97,201 real changes made)
(245,291 real changes made)

{com}. duplicates drop

{p 0 4}{txt}Duplicates in terms of {txt} all variables{p_end}

(1,926,631 observations deleted)

{com}. isid casenumber transid buyerid
{txt}
{com}. 
. * Include name match
. replace namematch_full = 1 if fammatch_full==1
{txt}(815 real changes made)

{com}. replace namematch_last = 1 if fammatch_last==1
{txt}(1,218 real changes made)

{com}. 
. *--------------------------------------------------
. * Get defendant family members
. *--------------------------------------------------
. 
. * Merge with family names
. joinby casenumber using data\raw\mdcourts\relnames_defendants.dta, update unm(m)
{txt}
{com}. drop _merge
{txt}
{com}. rename relative fname_rel
{res}{txt}
{com}. replace fname_rel = trim(itrim(upper(fname_rel)))
{txt}(45,226,022 real changes made)

{com}. drop n_relative source
{txt}
{com}. cap drop fullname
{txt}
{com}. 
. * Get rid of special characters
. replace fname_rel = subinstr(fname_rel,",","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,"/","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,".","",.)
{txt}(0 real changes made)

{com}. replace fname_rel = subinstr(fname_rel,`"""',"",.)
{txt}(41 real changes made)

{com}. 
. * Parse family name
. gen firstname_rel = word(fname_rel,1)
{txt}(3,483,001 missing values generated)

{com}. gen lastname_rel = word(fname_rel,2)
{txt}(3,483,001 missing values generated)

{com}. 
. * Family name match
. replace fammatch_full = 1 if lname_buyer == lastname_rel & !missing(lname_buyer) & fname_buyer == firstname_rel & !missing(fname_buyer)
{txt}(25,443 real changes made)

{com}. replace fammatch_last = 1 if lname_buyer == lastname_rel & !missing(lname_buyer)
{txt}(778,350 real changes made)

{com}. 
. *--------------------------------------------------
. * Collapse by buyer-defendant
. *--------------------------------------------------
. 
. * Collapse
. drop *name_rel
{txt}
{com}. foreach var of varlist fammatch_full fammatch_last {c -(}
{txt}  2{com}.         egen temp = max(`var'), by(casenumber transid buyerid)
{txt}  3{com}.         replace `var'=temp
{txt}  4{com}.         drop temp
{txt}  5{com}. {c )-}
{txt}(1,259,604 real changes made)
(1,649,868 real changes made)

{com}. duplicates drop

{p 0 4}{txt}Duplicates in terms of {txt} all variables{p_end}

(44,178,986 observations deleted)

{com}. isid casenumber transid buyerid
{txt}
{com}. 
. * Include name match
. replace namematch_full = 1 if fammatch_full==1
{txt}(21,357 real changes made)

{com}. replace namematch_last = 1 if fammatch_last==1
{txt}(14,783 real changes made)

{com}. 
. *--------------------------------------------------
. * Date gap between hearing and transaction
. *--------------------------------------------------
. 
. * Get number of days between INIT and transaction
. gen dates_init_trsct = date_record - DateINIT1
{txt}
{com}. 
. * Get information on last transaction before case
. egen pretsctdate = max(dates_init_trsct) if dates_init_trsct<0, by(casenumber)
{txt}(2,064,678 missing values generated)

{com}. egen preselldate = max(dates_init_trsct) if dates_init_trsct<0 & !foreclosed, by(casenumber)
{txt}(2,158,232 missing values generated)

{com}. egen prefcldate = max(dates_init_trsct) if dates_init_trsct<0 & foreclosed, by(casenumber)
{txt}(4,436,483 missing values generated)

{com}. gen preprice = salepriceamt if pretsctdate == dates_init_trsct
{txt}(4,289,951 missing values generated)

{com}. gen preloanamt = loanamt if pretsctdate == dates_init_trsct
{txt}(4,058,073 missing values generated)

{com}. gen preloantoprice = preloanamt/preprice if pretsctdate == dates_init_trsct
{txt}(4,297,574 missing values generated)

{com}. gen prebuyer_nml = namematch_last if pretsctdate == dates_init_trsct
{txt}(4,024,137 missing values generated)

{com}. gen prebuyer_nmf = namematch_full if pretsctdate == dates_init_trsct
{txt}(4,024,137 missing values generated)

{com}. gen prebuyer_fml = fammatch_last if pretsctdate == dates_init_trsct
{txt}(4,024,137 missing values generated)

{com}. gen prebuyer_fmf = fammatch_full if pretsctdate == dates_init_trsct
{txt}(4,024,137 missing values generated)

{com}. gen prebuyer_dml = defnamematch_last if pretsctdate == dates_init_trsct
{txt}(4,024,137 missing values generated)

{com}. gen prebuyer_dmf = defnamematch_full if pretsctdate == dates_init_trsct
{txt}(4,024,137 missing values generated)

{com}. gen prematurity = (date_loandue - DateINIT1)/365 if pretsctdate == dates_init_trsct
{txt}(4,387,132 missing values generated)

{com}. 
. * Get information on first transaction after case
. egen posttsctdate = min(dates_init_trsct) if dates_init_trsct>0, by(casenumber)
{txt}(2,465,814 missing values generated)

{com}. egen postfcldate = min(dates_init_trsct) if dates_init_trsct>0 & foreclosed, by(casenumber)
{txt}(4,287,540 missing values generated)

{com}. egen postselldate = min(dates_init_trsct) if dates_init_trsct>0 & !foreclosed, by(casenumber)
{txt}(2,708,311 missing values generated)

{com}. 
. *--------------------------------------------------
. * Generate transaction dummies
. *--------------------------------------------------
. 
. * Transaction and foreclosure dummies - post event
. foreach days in 91 182 365 730 1105 {c -(}   
{txt}  2{com}.         gen sellhouse_`days' = inrange(dates_init_trsct,1,`days') & !foreclosed 
{txt}  3{com}.         gen foreclose_`days' = inrange(dates_init_trsct,1,`days') & foreclosed
{txt}  4{com}. 
.         gen sellhouse_`days'pre = inrange(dates_init_trsct,-`days',-1) & !foreclosed
{txt}  5{com}.         gen foreclose_`days'pre = inrange(dates_init_trsct,-`days',-1) & foreclosed
{txt}  6{com}. {c )-}
{txt}
{com}. rename *91* *3mos*
{res}{txt}
{com}. rename *182* *6mos*
{res}{txt}
{com}. rename *365* *1yr*
{res}{txt}
{com}. rename *730* *2yr*
{res}{txt}
{com}. rename *1105* *3yr*
{res}{txt}
{com}. 
. * Drop if number of matches is above threshold
. egen nummatches = count(DateINIT1), by(casenumber address)
{txt}
{com}. drop if nummatches > 20 
{txt}(1,953,373 observations deleted)

{com}. 
. 
. *--------------------------------------------------
. * Collapse and save
. *--------------------------------------------------
. 
. * Collapse
. gsort casenumber
{txt}
{com}. collapse (max) sellhouse* foreclose* *fcldate *selldate *tsctdate p*price preloanamt prematurity prebuyer_*, by(casenumber)
{txt}
{com}. 
. * Save
. compress
  {txt}variable {bf}sellhouse_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}sellhouse_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}sellhouse_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}sellhouse_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}sellhouse_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}sellhouse_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}sellhouse_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}sellhouse_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}sellhouse_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}sellhouse_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}foreclose_3mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}foreclose_3mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}foreclose_6mos{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}foreclose_6mospre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}foreclose_1yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}foreclose_1yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}foreclose_2yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}foreclose_2yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}foreclose_3yr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}foreclose_3yrpre{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}prefcldate{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}postfcldate{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}preselldate{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}postselldate{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}pretsctdate{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}posttsctdate{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}prebuyer_nml{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}prebuyer_nmf{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}prebuyer_fml{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}prebuyer_fmf{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}prebuyer_dml{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}prebuyer_dmf{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
{txt}  (44,260,560 bytes saved)

{com}. save data\cases_zillowlinked.dta, replace
{txt}file data\cases_zillowlinked.dta saved

{com}. 
. ************************************************************************************************************
. ********************************* Clean criminal case data ************************************/
. ************************************************************************************************************
. 
. *--------------------------------------------------
. * Load data
. *--------------------------------------------------
. 
. * Load data
. cd "$workpath"
{res}H:\Dropbox\work\replicationpack
{txt}
{com}. use data\raw\mdcourts\crimcases_raw.dta, replace
{txt}
{com}. 
. * Drop duplicates cases
. duplicates drop

{p 0 4}{txt}Duplicates in terms of {txt} all variables{p_end}

(1 observation deleted)

{com}. 
. * Trim strings
. foreach var of varlist Disposition1_ CaseDisposition_ CommentWARI1_4 {c -(}
{txt}  2{com}.         replace `var' = trim(itrim(`var'))
{txt}  3{com}. {c )-}
{txt}(0 real changes made)
(0 real changes made)
(3 real changes made)

{com}. 
. *--------------------------------------------------
. * Hearing date
. *--------------------------------------------------
. 
. * Initial hearing date
. gen date_init = DateINIT1
{txt}
{com}. gen year_init = year(date_init)
{txt}
{com}. gen month_init = month(date_init)
{txt}
{com}. gen dow_init = dow(date_init)
{txt}
{com}. format date_init %td
{txt}
{com}. 
. *--------------------------------------------------
. * Charge information date
. *--------------------------------------------------
. 
. * Charge information
. gen cjisclass = CJISCode1_
{txt}(36,648 missing values generated)

{com}. egen charge = group(cjisclass)
{txt}(36,648 missing values generated)

{com}. joinby cjisclass using data\raw\mdcourts\cjiscode.dta, update unmatched(master)
{txt}
{com}. drop _merge
{txt}
{com}. format date_begin %td
{txt}
{com}. 
. * Keep closest charge date
. gen dategap1 = max(0, date_begin - date_init)
{txt}
{com}. gen dategap2 = max(0, date_init - date_end )
{txt}
{com}. egen dategap = rowmax(dategap1 dategap2)
{txt}
{com}. gsort casenum dategap -date_begin // tie goes to later charge definition
{txt}
{com}. duplicates drop casenum, force

{p 0 4}{txt}Duplicates in terms of {res} casenumber{p_end}

{txt}(1,540,056 observations deleted)

{com}. drop date_begin date_end dategap*
{txt}
{com}. 
. * Charge dummies
. gen charge_felony = felony == "Y"
{txt}
{com}. 
. *--------------------------------------------------
. * Initial hearing information
. *--------------------------------------------------
. 
. * Initial hearing bond status and amount
. replace INIT_Defendant_Status1 = trim(itrim(INIT_Defendant_Status1))
{txt}(511,709 real changes made)

{com}. gen init_statusROR = INIT_Defendant_Status1 == "ROR"
{txt}
{com}. gen init_statusHDOB = INIT_Defendant_Status1 == "HDOB"
{txt}
{com}. gen init_statusHWOB = INIT_Defendant_Status1 == "HWOB"
{txt}
{com}. gen init_bailamt = INIT_Bail_Amount1 
{txt}
{com}. egen init_commissionerid = group(INIT_Commissioner_ID1)
{txt}(4 missing values generated)

{com}. 
. *--------------------------------------------------
. * Defendant characteristics
. *--------------------------------------------------
. 
. * Defendant information
. gen defendant_dob = date(DOB1_,"MDY")
{txt}(2,957 missing values generated)

{com}. format defendant_dob %td
{txt}
{com}. gen defendant_age = year_init - year(defendant_dob)
{txt}(2,957 missing values generated)

{com}. 
. * Male dummy
. gen male = Sex == 2
{txt}
{com}. 
. * log age
. gen ln_age = ln(defendant_age)
{txt}(2,958 missing values generated)

{com}. 
. *--------------------------------------------------
. * Case disposition
. *--------------------------------------------------
. 
. * Guilty disposition dummy
. gen disposition_guilty = regexm(Disposition1_,"^GUILTY")
{txt}
{com}. gen disposition_dismissal = regexm(Disposition1_,"NOLLE PROSEQUI|DISMISSED")
{txt}
{com}. 
. *--------------------------------------------------
. * Bail information
. *--------------------------------------------------
. 
. * Bail review dummy
. replace BALR_Defendant_Status_Code1 = trim(itrim(BALR_Defendant_Status_Code1))
{txt}(25,624 real changes made)

{com}. gen bailreview = DateBALR1 > date_init & !missing(DateBALR1) // bail review held after bail review
{txt}
{com}. replace bailreview = DateBALR2 > date_init if !missing(DateBALR2) & DateBALR1 < date_init & !missing(date_init)
{txt}(238 real changes made)

{com}. replace bailreview = DateBALR3 > date_init if !missing(DateBALR3) & DateBALR2 < date_init & !missing(date_init)
{txt}(72 real changes made)

{com}. 
. * Bail amount set at bail review
. gen balr_bailamt = BALR_Bail_Amount1 if !missing(DateBALR1) & DateBALR1 >= date_init & !missing(date_init)
{txt}(646,864 missing values generated)

{com}. replace balr_bailamt = BALR_Bail_Amount2 if !missing(DateBALR2) & DateBALR1 < date_init & !missing(date_init)
{txt}(657 real changes made)

{com}. replace balr_bailamt = BALR_Bail_Amount3 if !missing(DateBALR3) & DateBALR2 < date_init & !missing(date_init)
{txt}(87 real changes made)

{com}. 
. * Final bail amount
. gen final_bailamt = init_bailamt // bail set at initial hearing
{txt}
{com}. replace final_bailamt = balr_bailamt if !missing(balr_bailamt) // replace with bail review bail amount if applicable
{txt}(273,243 real changes made)

{com}. 
. * Natural log
. gen init_lnbailamt = ln(init_bailamt)
{txt}(616,139 missing values generated)

{com}. 
. *--------------------------------------------------
. * Bond information
. *--------------------------------------------------
. 
. * Bond type dummies (condition on the bond is taken after the initial hearing date)
. replace BOND_Type_of_Bond_Code1 = trim(itrim(BOND_Type_of_Bond_Code1)) 
{txt}(85,516 real changes made)

{com}. foreach bondtype in CASH CORP PCT PARB PROP UPB {c -(}
{txt}  2{com}.         gen bond_`bondtype' = BOND_Type_of_Bond_Code1 == "`bondtype'" & DateBOND1 > date_init & !missing(DateBOND1)
{txt}  3{com}.         replace bond_`bondtype' = BOND_Type_of_Bond_Code2 == "`bondtype'" if !missing(DateBOND2) & DateBOND1 < date_init & DateBOND2 > date_init
{txt}  4{com}.         replace bond_`bondtype' = BOND_Type_of_Bond_Code3 == "`bondtype'" if !missing(DateBOND3) & DateBOND2 < date_init & DateBOND3 > date_init
{txt}  5{com}. {c )-}
{txt}(6 real changes made)
(2 real changes made)
(37 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(20 real changes made)
(1 real change made)
(0 real changes made)
(0 real changes made)

{com}. rename bond_*, lower
{res}{txt}
{com}. 
. * Other bond type dummies
. gen bond_other = bond_upb | bond_pct | bond_parb
{txt}
{com}. gen bond_parpct = bond_pct == 1 | bond_parb == 1
{txt}
{com}. gen normalbond = bond_prop|bond_cash|bond_corp|bond_parb|bond_pct|bond_upb
{txt}
{com}. gen upfrontbond = bond_prop|bond_cash|bond_corp
{txt}
{com}. 
. *--------------------------------------------------
. * Warrant information
. *--------------------------------------------------
. 
. * Failure to appear
. gen wari_fta = CommentWARI1_4 == "FTA" & DateWARI1 > DateINIT1 & !missing(DateWARI1)
{txt}
{com}. replace wari_fta = CommentWARI2_4 == "FTA" if !missing(DateWARI2) & DateWARI1 < DateINIT1 & DateWARI2 > DateINIT1
{txt}(25,174 real changes made)

{com}. replace wari_fta = CommentWARI3_4 == "FTA" if !missing(DateWARI3) & DateWARI2 < DateINIT1 & DateWARI3 > DateINIT1
{txt}(394 real changes made)

{com}. 
. * Recidivism
. gen wari_rescid = inlist(CommentWARI1_4,"ARR","VOP","OTH") & DateWARI1 > DateINIT1 & !missing(DateWARI1)
{txt}
{com}. replace wari_rescid = inlist(CommentWARI2_4,"ARR","VOP","OTH") if !missing(DateWARI2) & DateWARI1 < DateINIT1 & DateWARI2 > DateINIT1
{txt}(9,778 real changes made)

{com}. replace wari_rescid = inlist(CommentWARI3_4,"ARR","VOP","OTH") if !missing(DateWARI3) & DateWARI2 < DateINIT1 & DateWARI3 > DateINIT1
{txt}(78 real changes made)

{com}. 
. *--------------------------------------------------
. * Detention length
. *--------------------------------------------------
. 
. * Detention length
. gen timedetained = DateRELS1 - DateCMIT1 if DateINIT1 == DateCMIT1 & DateRELS1 >= DateCMIT1 // default
{txt}(689,586 missing values generated)

{com}. 
. * Zero if committed before subsequent warrant
. replace timedetained = 0 if DateINIT1 < DateCMIT1 & !missing(DateCMIT1) & DateCMIT1 > DateWARI1 
{txt}(86,174 real changes made)

{com}. replace timedetained = 0 if DateINIT1 < DateCMIT1 & !missing(DateCMIT1) & DateCMIT1 > DateWARI2 & DateWARI1 < DateINIT1 
{txt}(0 real changes made)

{com}. replace timedetained = 0 if DateINIT1 < DateCMIT1 & !missing(DateCMIT1) & DateCMIT1 > DateWARI3 & DateWARI2 < DateINIT1 
{txt}(0 real changes made)

{com}. 
. * Other instance of not detained
. replace timedetained = 0 if missing(DateCMIT1) // no commitment
{txt}(442,637 real changes made)

{com}. replace timedetained = 0 if init_statusROR == 1 // no detention time if released
{txt}(10,005 real changes made)

{com}. 
. * Timely release dummy (detention length < 1 day)
. gen timelyrelease1 = inrange(timedetained,0,1)
{txt}
{com}. 
. *--------------------------------------------------
. * Counts and fixed effects
. *--------------------------------------------------
. 
. * Number of cases per commissioner
. egen numcases_percomm = count(date_init), by(init_commissionerid)
{txt}
{com}. egen numcases_percommyr = count(date_init), by(init_commissionerid year_init)
{txt}
{com}. 
. * Court by year/month/dow fixed effects
. egen courtyr = group(Court year_init)
{res}{txt}
{com}. egen courtmth = group(Court month_init)
{res}{txt}
{com}. egen courtdow = group(Court dow_init)
{res}{txt}
{com}. egen courtyrdow = group(Court year_init dow_init)
{res}{txt}
{com}. egen courtmthdow = group(Court month_init dow_init)
{res}{txt}
{com}. 
. *--------------------------------------------------
. * Construct IV
. *--------------------------------------------------
. 
. * Drop if missing commissioner ID
. drop if missing(INIT_Commissioner_ID1)
{txt}(4 observations deleted)

{com}. 
. * Construct residuals for detention decision 
. local ctrlvars "ln_age male"
{txt}
{com}. local fevars "chargeid=i.charge raceid=i.Race courtyrdowid=i.courtyrdow courtmthdowid=i.courtmthdow"
{txt}
{com}. foreach type in "HDOB" "HWOB" "ROR" {c -(}
{txt}  2{com}.         reghdfe init_status`type' `ctrlvars', absorb(`fevars') residuals(e2_`type')
{txt}  3{com}.         drop chargeid raceid courtyrdowid courtmthdowid
{txt}  4{com}. {c )-}
{res}{txt}(dropped 430 {browse "http://scorreia.com/research/singletons.pdf":singleton observations})
{res}{txt}({browse "http://scorreia.com/research/hdfe.pdf":MWFE estimator} converged in 8 iterations)
{res}
{txt}HDFE Linear regression{col 51}Number of obs{col 67}= {res} 1,160,741
{txt}Absorbing 4 HDFE groups{col 51}F({res}   2{txt},{res}1158947{txt}){col 67}= {res}   2565.28
{txt}{col 51}Prob > F{col 67}= {res}    0.0000
{txt}{col 51}R-squared{col 67}= {res}    0.2085
{txt}{col 51}Adj R-squared{col 67}= {res}    0.2073
{txt}{col 51}Within R-sq.{col 67}= {res}    0.0044
{txt}{col 51}Root MSE{col 67}= {res}    0.4452

{txt}{hline 13}{c TT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{col 1}init_sta~DOB{col 14}{c |}      Coef.{col 26}   Std. Err.{col 38}      t{col 46}   P>|t|{col 54}     [95% Con{col 67}f. Interval]
{hline 13}{c +}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{space 6}ln_age {c |}{col 14}{res}{space 2}-.0296626{col 26}{space 2} .0013093{col 37}{space 1}  -22.66{col 46}{space 3}0.000{col 54}{space 4}-.0322287{col 67}{space 3}-.0270965
{txt}{space 8}male {c |}{col 14}{res}{space 2} .0769075{col 26}{space 2} .0011391{col 37}{space 1}   67.52{col 46}{space 3}0.000{col 54}{space 4} .0746749{col 67}{space 3} .0791401
{txt}{space 7}_cons {c |}{col 14}{res}{space 2} .5369143{col 26}{space 2} .0046332{col 37}{space 1}  115.88{col 46}{space 3}0.000{col 54}{space 4} .5278333{col 67}{space 3} .5459953
{txt}{hline 13}{c BT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{res}
{txt}Absorbed degrees of freedom:
{res}{col 1}{text}{hline 13}{c TT}{hline 12}{hline 12}{hline 14}{hline 1}{c TRC}
{col 1}{text} Absorbed FE{col 14}{c |} Categories{col 27} - Redundant{col 39}  = Num. Coefs{col 54}{c |}
{res}{col 1}{text}{hline 13}{c +}{hline 12}{hline 12}{hline 14}{hline 1}{c RT}
{col 1}{text}      charge{col 14}{c |}{space 1}     1117{col 27}{space 1}        0{col 39}{result}{space 1}     1117{col 53}{text} {col 54}{c |}
{res}{col 1}{text}        Race{col 14}{c |}{space 1}        5{col 27}{space 1}        1{col 39}{result}{space 1}        4{col 53}{text} {col 54}{c |}
{res}{col 1}{text}  courtyrdow{col 14}{c |}{space 1}      441{col 27}{space 1}        1{col 39}{result}{space 1}      440{col 53}{text}?{col 54}{c |}
{res}{col 1}{text} courtmthdow{col 14}{c |}{space 1}      252{col 27}{space 1}       21{col 39}{result}{space 1}      231{col 53}{text}?{col 54}{c |}
{res}{col 1}{text}{hline 13}{c BT}{hline 12}{hline 12}{hline 14}{hline 1}{c BRC}
? = number of redundant parameters may be higher
{res}{txt}(dropped 430 {browse "http://scorreia.com/research/singletons.pdf":singleton observations})
{res}{txt}({browse "http://scorreia.com/research/hdfe.pdf":MWFE estimator} converged in 8 iterations)
{res}
{txt}HDFE Linear regression{col 51}Number of obs{col 67}= {res} 1,160,741
{txt}Absorbing 4 HDFE groups{col 51}F({res}   2{txt},{res}1158947{txt}){col 67}= {res}   1310.23
{txt}{col 51}Prob > F{col 67}= {res}    0.0000
{txt}{col 51}R-squared{col 67}= {res}    0.3742
{txt}{col 51}Adj R-squared{col 67}= {res}    0.3732
{txt}{col 51}Within R-sq.{col 67}= {res}    0.0023
{txt}{col 51}Root MSE{col 67}= {res}    0.2120

{txt}{hline 13}{c TT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{col 1}init_sta~WOB{col 14}{c |}      Coef.{col 26}   Std. Err.{col 38}      t{col 46}   P>|t|{col 54}     [95% Con{col 67}f. Interval]
{hline 13}{c +}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{space 6}ln_age {c |}{col 14}{res}{space 2} .0031084{col 26}{space 2} .0006234{col 37}{space 1}    4.99{col 46}{space 3}0.000{col 54}{space 4} .0018865{col 67}{space 3} .0043304
{txt}{space 8}male {c |}{col 14}{res}{space 2} .0276804{col 26}{space 2} .0005424{col 37}{space 1}   51.03{col 46}{space 3}0.000{col 54}{space 4} .0266173{col 67}{space 3} .0287435
{txt}{space 7}_cons {c |}{col 14}{res}{space 2} .0441985{col 26}{space 2} .0022062{col 37}{space 1}   20.03{col 46}{space 3}0.000{col 54}{space 4} .0398744{col 67}{space 3} .0485227
{txt}{hline 13}{c BT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{res}
{txt}Absorbed degrees of freedom:
{res}{col 1}{text}{hline 13}{c TT}{hline 12}{hline 12}{hline 14}{hline 1}{c TRC}
{col 1}{text} Absorbed FE{col 14}{c |} Categories{col 27} - Redundant{col 39}  = Num. Coefs{col 54}{c |}
{res}{col 1}{text}{hline 13}{c +}{hline 12}{hline 12}{hline 14}{hline 1}{c RT}
{col 1}{text}      charge{col 14}{c |}{space 1}     1117{col 27}{space 1}        0{col 39}{result}{space 1}     1117{col 53}{text} {col 54}{c |}
{res}{col 1}{text}        Race{col 14}{c |}{space 1}        5{col 27}{space 1}        1{col 39}{result}{space 1}        4{col 53}{text} {col 54}{c |}
{res}{col 1}{text}  courtyrdow{col 14}{c |}{space 1}      441{col 27}{space 1}        1{col 39}{result}{space 1}      440{col 53}{text}?{col 54}{c |}
{res}{col 1}{text} courtmthdow{col 14}{c |}{space 1}      252{col 27}{space 1}       21{col 39}{result}{space 1}      231{col 53}{text}?{col 54}{c |}
{res}{col 1}{text}{hline 13}{c BT}{hline 12}{hline 12}{hline 14}{hline 1}{c BRC}
? = number of redundant parameters may be higher
{res}{txt}(dropped 430 {browse "http://scorreia.com/research/singletons.pdf":singleton observations})
{res}{txt}({browse "http://scorreia.com/research/hdfe.pdf":MWFE estimator} converged in 8 iterations)
{res}
{txt}HDFE Linear regression{col 51}Number of obs{col 67}= {res} 1,160,741
{txt}Absorbing 4 HDFE groups{col 51}F({res}   2{txt},{res}1158947{txt}){col 67}= {res}   5067.72
{txt}{col 51}Prob > F{col 67}= {res}    0.0000
{txt}{col 51}R-squared{col 67}= {res}    0.2880
{txt}{col 51}Adj R-squared{col 67}= {res}    0.2869
{txt}{col 51}Within R-sq.{col 67}= {res}    0.0087
{txt}{col 51}Root MSE{col 67}= {res}    0.4173

{txt}{hline 13}{c TT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{col 1}init_statu~R{col 14}{c |}      Coef.{col 26}   Std. Err.{col 38}      t{col 46}   P>|t|{col 54}     [95% Con{col 67}f. Interval]
{hline 13}{c +}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{space 6}ln_age {c |}{col 14}{res}{space 2} .0263451{col 26}{space 2} .0012272{col 37}{space 1}   21.47{col 46}{space 3}0.000{col 54}{space 4} .0239398{col 67}{space 3} .0287504
{txt}{space 8}male {c |}{col 14}{res}{space 2}-.1045743{col 26}{space 2} .0010677{col 37}{space 1}  -97.94{col 46}{space 3}0.000{col 54}{space 4} -.106667{col 67}{space 3}-.1024816
{txt}{space 7}_cons {c |}{col 14}{res}{space 2}  .419418{col 26}{space 2} .0043429{col 37}{space 1}   96.58{col 46}{space 3}0.000{col 54}{space 4} .4109061{col 67}{space 3} .4279299
{txt}{hline 13}{c BT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{res}
{txt}Absorbed degrees of freedom:
{res}{col 1}{text}{hline 13}{c TT}{hline 12}{hline 12}{hline 14}{hline 1}{c TRC}
{col 1}{text} Absorbed FE{col 14}{c |} Categories{col 27} - Redundant{col 39}  = Num. Coefs{col 54}{c |}
{res}{col 1}{text}{hline 13}{c +}{hline 12}{hline 12}{hline 14}{hline 1}{c RT}
{col 1}{text}      charge{col 14}{c |}{space 1}     1117{col 27}{space 1}        0{col 39}{result}{space 1}     1117{col 53}{text} {col 54}{c |}
{res}{col 1}{text}        Race{col 14}{c |}{space 1}        5{col 27}{space 1}        1{col 39}{result}{space 1}        4{col 53}{text} {col 54}{c |}
{res}{col 1}{text}  courtyrdow{col 14}{c |}{space 1}      441{col 27}{space 1}        1{col 39}{result}{space 1}      440{col 53}{text}?{col 54}{c |}
{res}{col 1}{text} courtmthdow{col 14}{c |}{space 1}      252{col 27}{space 1}       21{col 39}{result}{space 1}      231{col 53}{text}?{col 54}{c |}
{res}{col 1}{text}{hline 13}{c BT}{hline 12}{hline 12}{hline 14}{hline 1}{c BRC}
? = number of redundant parameters may be higher
{res}{txt}
{com}. 
. * Construct detention IV based on leave-out averages
. foreach type in "ROR" {c -(}
{txt}  2{com}.         egen sum`type'_comm = sum(init_status`type'), by(year_init init_commissionerid)
{txt}  3{com}.         egen count`type'_comm = count(init_status`type'), by(year_init init_commissionerid)
{txt}  4{com}.         gen iv`type'_comm = (sum`type'_comm - init_status`type')/(count`type'_comm - 1)
{txt}  5{com}.         drop sum`type'* count`type'* 
{txt}  6{com}. {c )-}
{txt}(627 missing values generated)

{com}. 
. * Construct detention IV based on residualized leave-out means
. foreach type in "ROR" {c -(}
{txt}  2{com}.         egen sum`type'_comm = sum(e2_`type'), by(year_init init_commissionerid)
{txt}  3{com}.         egen count`type'_comm = count(e2_`type'), by(year_init init_commissionerid)
{txt}  4{com}.         gen rivt`type'_comm = (sum`type'_comm - e2_`type')/(count`type'_comm - 1)       
{txt}  5{com}.         drop sum`type'* count`type'* 
{txt}  6{com}. {c )-}
{txt}(44,504 missing values generated)

{com}. 
. *--------------------------------------------------
. * Other fixed effects, winsorize, and save
. *--------------------------------------------------
. 
. * Fixed effects
. egen chargeyr = group(charge year_init)
{res}{txt}(36,648 missing values generated)

{com}. egen zipyr = group(ZipCode1_ year_init)
{res}{txt}(388 missing values generated)

{com}. egen sexrace = group(Sex Race)
{res}{txt}(4,813 missing values generated)

{com}. 
. * Winsorize
. winsor2 final_*bailamt init_*bailamt timedetained, cuts(1 99) replace
{txt}
{com}. 
. * Natural logs
. gen ln_timedetained = ln(1+timedetained)
{txt}(151,024 missing values generated)

{com}. 
. * Save file
. compress
  {txt}variable {bf}date_init{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}year_init{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}month_init{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}dow_init{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}charge{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}charge_felony{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}init_statusROR{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}init_statusHDOB{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}init_statusHWOB{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}init_commissionerid{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}defendant_dob{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}defendant_age{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}male{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}disposition_guilty{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}disposition_dismissal{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bailreview{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bond_cash{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bond_corp{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bond_pct{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bond_parb{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bond_prop{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bond_upb{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bond_other{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}bond_parpct{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}normalbond{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}upfrontbond{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}wari_fta{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}wari_rescid{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}timedetained{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}timelyrelease1{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}numcases_percomm{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}numcases_percommyr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}courtyr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}courtmth{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}courtdow{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}courtyrdow{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}courtmthdow{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}chargeyr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}zipyr{sf} was {bf}{res}float{sf}{txt} now {bf}{res}int{sf}
  {txt}variable {bf}sexrace{sf} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}describe70{sf} was {bf}{res}str62{sf}{txt} now {bf}{res}str60{sf}
  {txt}variable {bf}describe100{sf} was {bf}{res}str71{sf}{txt} now {bf}{res}str61{sf}
{txt}  (144,555,000 bytes saved)

{com}. save data\mdcourtsdata.dta, replace
{txt}file data\mdcourtsdata.dta saved

{com}. 
. **************************************************************************************************************
. 
. * Close log
. log close
      {txt}name:  {res}<unnamed>
       {txt}log:  {res}H:\Dropbox\work\replicationpack\logs\1_CleanData.smcl
  {txt}log type:  {res}smcl
 {txt}closed on:  {res}29 Nov 2024, 20:59:46
{txt}{.-}
{smcl}
{txt}{sf}{ul off}